diff --git a/CHANGELOG.md b/CHANGELOG.md index e4bbe04159..4b6c81e5fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Docs: https://docs.openclaw.ai - Gateway: add `agents.create`, `agents.update`, `agents.delete` RPC methods for web UI agent management. (#11045) Thanks @advaitpaliwal. - Gateway: add node command allowlists (default-deny unknown node commands; configurable via `gateway.nodes.allowCommands` / `gateway.nodes.denyCommands`). (#11755) Thanks @mbelinky. - Plugins: add `device-pair` (Telegram `/pair` flow) and `phone-control` (iOS/Android node controls). (#11755) Thanks @mbelinky. +- iOS: add alpha iOS node app (Telegram setup-code pairing + Talk/Chat surfaces). (#11756) Thanks @mbelinky. ### Fixes diff --git a/apps/ios/README.md b/apps/ios/README.md index 7af4d5d5da..2e426c18d7 100644 --- a/apps/ios/README.md +++ b/apps/ios/README.md @@ -1,28 +1,66 @@ # OpenClaw (iOS) -Internal-only SwiftUI app scaffold. +This is an **alpha** iOS app that connects to an OpenClaw Gateway as a `role: node`. + +Expect rough edges: + +- UI and onboarding are changing quickly. +- Background behavior is not stable yet (foreground app is the supported mode right now). +- Permissions are opt-in and the app should be treated as sensitive while we harden it. + +## What It Does + +- Connects to a Gateway over `ws://` / `wss://` +- Pairs a new device (approved from your bot) +- Exposes phone services as node commands (camera, location, photos, calendar, reminders, etc; gated by iOS permissions) +- Provides Talk + Chat surfaces (alpha) + +## Pairing (Recommended Flow) + +If your Gateway has the `device-pair` plugin installed: + +1. In Telegram, message your bot: `/pair` +2. Copy the **setup code** message +3. On iOS: OpenClaw → Settings → Gateway → paste setup code → Connect +4. Back in Telegram: `/pair approve` + +## Build And Run + +Prereqs: + +- Xcode (current stable) +- `pnpm` +- `xcodegen` + +From the repo root: -## Lint/format (required) ```bash -brew install swiftformat swiftlint +pnpm install +pnpm ios:open ``` -## Generate the Xcode project +Then in Xcode: + +1. Select the `OpenClaw` scheme +2. Select a simulator or a connected device +3. Run + +If you're using a personal Apple Development team, you may need to change the bundle identifier in Xcode to a unique value so signing succeeds. + +## Build From CLI + +```bash +pnpm ios:build +``` + +## Tests + ```bash cd apps/ios xcodegen generate -open OpenClaw.xcodeproj +xcodebuild test -project OpenClaw.xcodeproj -scheme OpenClaw -destination "platform=iOS Simulator,name=iPhone 17" ``` -## Shared packages -- `../shared/OpenClawKit` — shared types/constants used by iOS (and later macOS bridge + gateway routing). +## Shared Code -## fastlane -```bash -brew install fastlane - -cd apps/ios -fastlane lanes -``` - -See `apps/ios/fastlane/SETUP.md` for App Store Connect auth + upload lanes. +- `apps/shared/OpenClawKit` contains the shared transport/types used by the iOS app. diff --git a/apps/ios/Sources/Calendar/CalendarService.swift b/apps/ios/Sources/Calendar/CalendarService.swift new file mode 100644 index 0000000000..9ac83dd392 --- /dev/null +++ b/apps/ios/Sources/Calendar/CalendarService.swift @@ -0,0 +1,167 @@ +import EventKit +import Foundation +import OpenClawKit + +final class CalendarService: CalendarServicing { + func events(params: OpenClawCalendarEventsParams) async throws -> OpenClawCalendarEventsPayload { + let store = EKEventStore() + let status = EKEventStore.authorizationStatus(for: .event) + let authorized = await Self.ensureAuthorization(store: store, status: status) + guard authorized else { + throw NSError(domain: "Calendar", code: 1, userInfo: [ + NSLocalizedDescriptionKey: "CALENDAR_PERMISSION_REQUIRED: grant Calendar permission", + ]) + } + + let (start, end) = Self.resolveRange( + startISO: params.startISO, + endISO: params.endISO) + let predicate = store.predicateForEvents(withStart: start, end: end, calendars: nil) + let events = store.events(matching: predicate) + let limit = max(1, min(params.limit ?? 50, 500)) + let selected = Array(events.prefix(limit)) + + let formatter = ISO8601DateFormatter() + let payload = selected.map { event in + OpenClawCalendarEventPayload( + identifier: event.eventIdentifier ?? UUID().uuidString, + title: event.title ?? "(untitled)", + startISO: formatter.string(from: event.startDate), + endISO: formatter.string(from: event.endDate), + isAllDay: event.isAllDay, + location: event.location, + calendarTitle: event.calendar.title) + } + + return OpenClawCalendarEventsPayload(events: payload) + } + + func add(params: OpenClawCalendarAddParams) async throws -> OpenClawCalendarAddPayload { + let store = EKEventStore() + let status = EKEventStore.authorizationStatus(for: .event) + let authorized = await Self.ensureWriteAuthorization(store: store, status: status) + guard authorized else { + throw NSError(domain: "Calendar", code: 2, userInfo: [ + NSLocalizedDescriptionKey: "CALENDAR_PERMISSION_REQUIRED: grant Calendar permission", + ]) + } + + let title = params.title.trimmingCharacters(in: .whitespacesAndNewlines) + guard !title.isEmpty else { + throw NSError(domain: "Calendar", code: 3, userInfo: [ + NSLocalizedDescriptionKey: "CALENDAR_INVALID: title required", + ]) + } + + let formatter = ISO8601DateFormatter() + guard let start = formatter.date(from: params.startISO) else { + throw NSError(domain: "Calendar", code: 4, userInfo: [ + NSLocalizedDescriptionKey: "CALENDAR_INVALID: startISO required", + ]) + } + guard let end = formatter.date(from: params.endISO) else { + throw NSError(domain: "Calendar", code: 5, userInfo: [ + NSLocalizedDescriptionKey: "CALENDAR_INVALID: endISO required", + ]) + } + + let event = EKEvent(eventStore: store) + event.title = title + event.startDate = start + event.endDate = end + event.isAllDay = params.isAllDay ?? false + if let location = params.location?.trimmingCharacters(in: .whitespacesAndNewlines), !location.isEmpty { + event.location = location + } + if let notes = params.notes?.trimmingCharacters(in: .whitespacesAndNewlines), !notes.isEmpty { + event.notes = notes + } + event.calendar = try Self.resolveCalendar( + store: store, + calendarId: params.calendarId, + calendarTitle: params.calendarTitle) + + try store.save(event, span: .thisEvent) + + let payload = OpenClawCalendarEventPayload( + identifier: event.eventIdentifier ?? UUID().uuidString, + title: event.title ?? title, + startISO: formatter.string(from: event.startDate), + endISO: formatter.string(from: event.endDate), + isAllDay: event.isAllDay, + location: event.location, + calendarTitle: event.calendar.title) + + return OpenClawCalendarAddPayload(event: payload) + } + + private static func ensureAuthorization(store: EKEventStore, status: EKAuthorizationStatus) async -> Bool { + switch status { + case .authorized: + return true + case .notDetermined: + // Don’t prompt during node.invoke; prompts block the invoke and lead to timeouts. + return false + case .restricted, .denied: + return false + case .fullAccess: + return true + case .writeOnly: + return false + @unknown default: + return false + } + } + + private static func ensureWriteAuthorization(store: EKEventStore, status: EKAuthorizationStatus) async -> Bool { + switch status { + case .authorized, .fullAccess, .writeOnly: + return true + case .notDetermined: + // Don’t prompt during node.invoke; prompts block the invoke and lead to timeouts. + return false + case .restricted, .denied: + return false + @unknown default: + return false + } + } + + private static func resolveCalendar( + store: EKEventStore, + calendarId: String?, + calendarTitle: String?) throws -> EKCalendar + { + if let id = calendarId?.trimmingCharacters(in: .whitespacesAndNewlines), !id.isEmpty, + let calendar = store.calendar(withIdentifier: id) + { + return calendar + } + + if let title = calendarTitle?.trimmingCharacters(in: .whitespacesAndNewlines), !title.isEmpty { + if let calendar = store.calendars(for: .event).first(where: { + $0.title.compare(title, options: [.caseInsensitive, .diacriticInsensitive]) == .orderedSame + }) { + return calendar + } + throw NSError(domain: "Calendar", code: 6, userInfo: [ + NSLocalizedDescriptionKey: "CALENDAR_NOT_FOUND: no calendar named \(title)", + ]) + } + + if let fallback = store.defaultCalendarForNewEvents { + return fallback + } + + throw NSError(domain: "Calendar", code: 7, userInfo: [ + NSLocalizedDescriptionKey: "CALENDAR_NOT_FOUND: no default calendar", + ]) + } + + private static func resolveRange(startISO: String?, endISO: String?) -> (Date, Date) { + let formatter = ISO8601DateFormatter() + let start = startISO.flatMap { formatter.date(from: $0) } ?? Date() + let end = endISO.flatMap { formatter.date(from: $0) } ?? start.addingTimeInterval(7 * 24 * 3600) + return (start, end) + } +} diff --git a/apps/ios/Sources/Capabilities/NodeCapabilityRouter.swift b/apps/ios/Sources/Capabilities/NodeCapabilityRouter.swift new file mode 100644 index 0000000000..6dbdd51eb8 --- /dev/null +++ b/apps/ios/Sources/Capabilities/NodeCapabilityRouter.swift @@ -0,0 +1,25 @@ +import Foundation +import OpenClawKit + +@MainActor +final class NodeCapabilityRouter { + enum RouterError: Error { + case unknownCommand + case handlerUnavailable + } + + typealias Handler = (BridgeInvokeRequest) async throws -> BridgeInvokeResponse + + private let handlers: [String: Handler] + + init(handlers: [String: Handler]) { + self.handlers = handlers + } + + func handle(_ request: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + guard let handler = handlers[request.command] else { + throw RouterError.unknownCommand + } + return try await handler(request) + } +} diff --git a/apps/ios/Sources/Chat/ChatSheet.swift b/apps/ios/Sources/Chat/ChatSheet.swift index 6b8fffd23d..bbed501cf7 100644 --- a/apps/ios/Sources/Chat/ChatSheet.swift +++ b/apps/ios/Sources/Chat/ChatSheet.swift @@ -6,14 +6,16 @@ struct ChatSheet: View { @Environment(\.dismiss) private var dismiss @State private var viewModel: OpenClawChatViewModel private let userAccent: Color? + private let agentName: String? - init(gateway: GatewayNodeSession, sessionKey: String, userAccent: Color? = nil) { + init(gateway: GatewayNodeSession, sessionKey: String, agentName: String? = nil, userAccent: Color? = nil) { let transport = IOSGatewayChatTransport(gateway: gateway) self._viewModel = State( initialValue: OpenClawChatViewModel( sessionKey: sessionKey, transport: transport)) self.userAccent = userAccent + self.agentName = agentName } var body: some View { @@ -22,7 +24,7 @@ struct ChatSheet: View { viewModel: self.viewModel, showsSessionSwitcher: true, userAccent: self.userAccent) - .navigationTitle("Chat") + .navigationTitle(self.chatTitle) .navigationBarTitleDisplayMode(.inline) .toolbar { ToolbarItem(placement: .topBarTrailing) { @@ -36,4 +38,10 @@ struct ChatSheet: View { } } } + + private var chatTitle: String { + let trimmed = (self.agentName ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + if trimmed.isEmpty { return "Chat" } + return "Chat (\(trimmed))" + } } diff --git a/apps/ios/Sources/Contacts/ContactsService.swift b/apps/ios/Sources/Contacts/ContactsService.swift new file mode 100644 index 0000000000..db203d070f --- /dev/null +++ b/apps/ios/Sources/Contacts/ContactsService.swift @@ -0,0 +1,212 @@ +import Contacts +import Foundation +import OpenClawKit + +final class ContactsService: ContactsServicing { + private static var payloadKeys: [CNKeyDescriptor] { + [ + CNContactIdentifierKey as CNKeyDescriptor, + CNContactGivenNameKey as CNKeyDescriptor, + CNContactFamilyNameKey as CNKeyDescriptor, + CNContactOrganizationNameKey as CNKeyDescriptor, + CNContactPhoneNumbersKey as CNKeyDescriptor, + CNContactEmailAddressesKey as CNKeyDescriptor, + ] + } + + func search(params: OpenClawContactsSearchParams) async throws -> OpenClawContactsSearchPayload { + let store = CNContactStore() + let status = CNContactStore.authorizationStatus(for: .contacts) + let authorized = await Self.ensureAuthorization(store: store, status: status) + guard authorized else { + throw NSError(domain: "Contacts", code: 1, userInfo: [ + NSLocalizedDescriptionKey: "CONTACTS_PERMISSION_REQUIRED: grant Contacts permission", + ]) + } + + let limit = max(1, min(params.limit ?? 25, 200)) + + var contacts: [CNContact] = [] + if let query = params.query?.trimmingCharacters(in: .whitespacesAndNewlines), !query.isEmpty { + let predicate = CNContact.predicateForContacts(matchingName: query) + contacts = try store.unifiedContacts(matching: predicate, keysToFetch: Self.payloadKeys) + } else { + let request = CNContactFetchRequest(keysToFetch: Self.payloadKeys) + try store.enumerateContacts(with: request) { contact, stop in + contacts.append(contact) + if contacts.count >= limit { + stop.pointee = true + } + } + } + + let sliced = Array(contacts.prefix(limit)) + let payload = sliced.map { Self.payload(from: $0) } + + return OpenClawContactsSearchPayload(contacts: payload) + } + + func add(params: OpenClawContactsAddParams) async throws -> OpenClawContactsAddPayload { + let store = CNContactStore() + let status = CNContactStore.authorizationStatus(for: .contacts) + let authorized = await Self.ensureAuthorization(store: store, status: status) + guard authorized else { + throw NSError(domain: "Contacts", code: 1, userInfo: [ + NSLocalizedDescriptionKey: "CONTACTS_PERMISSION_REQUIRED: grant Contacts permission", + ]) + } + + let givenName = params.givenName?.trimmingCharacters(in: .whitespacesAndNewlines) + let familyName = params.familyName?.trimmingCharacters(in: .whitespacesAndNewlines) + let organizationName = params.organizationName?.trimmingCharacters(in: .whitespacesAndNewlines) + let displayName = params.displayName?.trimmingCharacters(in: .whitespacesAndNewlines) + let phoneNumbers = Self.normalizeStrings(params.phoneNumbers) + let emails = Self.normalizeStrings(params.emails, lowercased: true) + + let hasName = !(givenName ?? "").isEmpty || !(familyName ?? "").isEmpty || !(displayName ?? "").isEmpty + let hasOrg = !(organizationName ?? "").isEmpty + let hasDetails = !phoneNumbers.isEmpty || !emails.isEmpty + guard hasName || hasOrg || hasDetails else { + throw NSError(domain: "Contacts", code: 2, userInfo: [ + NSLocalizedDescriptionKey: "CONTACTS_INVALID: include a name, organization, phone, or email", + ]) + } + + if !phoneNumbers.isEmpty || !emails.isEmpty { + if let existing = try Self.findExistingContact( + store: store, + phoneNumbers: phoneNumbers, + emails: emails) + { + return OpenClawContactsAddPayload(contact: Self.payload(from: existing)) + } + } + + let contact = CNMutableContact() + contact.givenName = givenName ?? "" + contact.familyName = familyName ?? "" + contact.organizationName = organizationName ?? "" + if contact.givenName.isEmpty && contact.familyName.isEmpty, let displayName { + contact.givenName = displayName + } + contact.phoneNumbers = phoneNumbers.map { + CNLabeledValue(label: CNLabelPhoneNumberMobile, value: CNPhoneNumber(stringValue: $0)) + } + contact.emailAddresses = emails.map { + CNLabeledValue(label: CNLabelHome, value: $0 as NSString) + } + + let save = CNSaveRequest() + save.add(contact, toContainerWithIdentifier: nil) + try store.execute(save) + + let persisted: CNContact + if !contact.identifier.isEmpty { + persisted = try store.unifiedContact( + withIdentifier: contact.identifier, + keysToFetch: Self.payloadKeys) + } else { + persisted = contact + } + + return OpenClawContactsAddPayload(contact: Self.payload(from: persisted)) + } + + private static func ensureAuthorization(store: CNContactStore, status: CNAuthorizationStatus) async -> Bool { + switch status { + case .authorized, .limited: + return true + case .notDetermined: + // Don’t prompt during node.invoke; the caller should instruct the user to grant permission. + // Prompts block the invoke and lead to timeouts in headless flows. + return false + case .restricted, .denied: + return false + @unknown default: + return false + } + } + + private static func normalizeStrings(_ values: [String]?, lowercased: Bool = false) -> [String] { + (values ?? []) + .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } + .filter { !$0.isEmpty } + .map { lowercased ? $0.lowercased() : $0 } + } + + private static func findExistingContact( + store: CNContactStore, + phoneNumbers: [String], + emails: [String]) throws -> CNContact? + { + if phoneNumbers.isEmpty && emails.isEmpty { + return nil + } + + var matches: [CNContact] = [] + + for phone in phoneNumbers { + let predicate = CNContact.predicateForContacts(matching: CNPhoneNumber(stringValue: phone)) + let contacts = try store.unifiedContacts(matching: predicate, keysToFetch: Self.payloadKeys) + matches.append(contentsOf: contacts) + } + + for email in emails { + let predicate = CNContact.predicateForContacts(matchingEmailAddress: email) + let contacts = try store.unifiedContacts(matching: predicate, keysToFetch: Self.payloadKeys) + matches.append(contentsOf: contacts) + } + + return Self.matchContacts(contacts: matches, phoneNumbers: phoneNumbers, emails: emails) + } + + private static func matchContacts( + contacts: [CNContact], + phoneNumbers: [String], + emails: [String]) -> CNContact? + { + let normalizedPhones = Set(phoneNumbers.map { normalizePhone($0) }.filter { !$0.isEmpty }) + let normalizedEmails = Set(emails.map { $0.lowercased() }.filter { !$0.isEmpty }) + var seen = Set() + + for contact in contacts { + guard seen.insert(contact.identifier).inserted else { continue } + let contactPhones = Set(contact.phoneNumbers.map { normalizePhone($0.value.stringValue) }) + let contactEmails = Set(contact.emailAddresses.map { String($0.value).lowercased() }) + + if !normalizedPhones.isEmpty, !contactPhones.isDisjoint(with: normalizedPhones) { + return contact + } + if !normalizedEmails.isEmpty, !contactEmails.isDisjoint(with: normalizedEmails) { + return contact + } + } + + return nil + } + + private static func normalizePhone(_ phone: String) -> String { + let trimmed = phone.trimmingCharacters(in: .whitespacesAndNewlines) + let digits = trimmed.unicodeScalars.filter { CharacterSet.decimalDigits.contains($0) } + let normalized = String(String.UnicodeScalarView(digits)) + return normalized.isEmpty ? trimmed : normalized + } + + private static func payload(from contact: CNContact) -> OpenClawContactPayload { + OpenClawContactPayload( + identifier: contact.identifier, + displayName: CNContactFormatter.string(from: contact, style: .fullName) + ?? "\(contact.givenName) \(contact.familyName)".trimmingCharacters(in: .whitespacesAndNewlines), + givenName: contact.givenName, + familyName: contact.familyName, + organizationName: contact.organizationName, + phoneNumbers: contact.phoneNumbers.map { $0.value.stringValue }, + emails: contact.emailAddresses.map { String($0.value) }) + } + +#if DEBUG + static func _test_matches(contact: CNContact, phoneNumbers: [String], emails: [String]) -> Bool { + matchContacts(contacts: [contact], phoneNumbers: phoneNumbers, emails: emails) != nil + } +#endif +} diff --git a/apps/ios/Sources/Device/DeviceStatusService.swift b/apps/ios/Sources/Device/DeviceStatusService.swift new file mode 100644 index 0000000000..fed2716b5b --- /dev/null +++ b/apps/ios/Sources/Device/DeviceStatusService.swift @@ -0,0 +1,87 @@ +import Foundation +import OpenClawKit +import UIKit + +final class DeviceStatusService: DeviceStatusServicing { + private let networkStatus: NetworkStatusService + + init(networkStatus: NetworkStatusService = NetworkStatusService()) { + self.networkStatus = networkStatus + } + + func status() async throws -> OpenClawDeviceStatusPayload { + let battery = self.batteryStatus() + let thermal = self.thermalStatus() + let storage = self.storageStatus() + let network = await self.networkStatus.currentStatus() + let uptime = ProcessInfo.processInfo.systemUptime + + return OpenClawDeviceStatusPayload( + battery: battery, + thermal: thermal, + storage: storage, + network: network, + uptimeSeconds: uptime) + } + + func info() -> OpenClawDeviceInfoPayload { + let device = UIDevice.current + let appVersion = Bundle.main.infoDictionary?["CFBundleShortVersionString"] as? String ?? "dev" + let appBuild = Bundle.main.infoDictionary?["CFBundleVersion"] as? String ?? "0" + let locale = Locale.preferredLanguages.first ?? Locale.current.identifier + return OpenClawDeviceInfoPayload( + deviceName: device.name, + modelIdentifier: Self.modelIdentifier(), + systemName: device.systemName, + systemVersion: device.systemVersion, + appVersion: appVersion, + appBuild: appBuild, + locale: locale) + } + + private func batteryStatus() -> OpenClawBatteryStatusPayload { + let device = UIDevice.current + device.isBatteryMonitoringEnabled = true + let level = device.batteryLevel >= 0 ? Double(device.batteryLevel) : nil + let state: OpenClawBatteryState = switch device.batteryState { + case .charging: .charging + case .full: .full + case .unplugged: .unplugged + case .unknown: .unknown + @unknown default: .unknown + } + return OpenClawBatteryStatusPayload( + level: level, + state: state, + lowPowerModeEnabled: ProcessInfo.processInfo.isLowPowerModeEnabled) + } + + private func thermalStatus() -> OpenClawThermalStatusPayload { + let state: OpenClawThermalState = switch ProcessInfo.processInfo.thermalState { + case .nominal: .nominal + case .fair: .fair + case .serious: .serious + case .critical: .critical + @unknown default: .nominal + } + return OpenClawThermalStatusPayload(state: state) + } + + private func storageStatus() -> OpenClawStorageStatusPayload { + let attrs = (try? FileManager.default.attributesOfFileSystem(forPath: NSHomeDirectory())) ?? [:] + let total = (attrs[.systemSize] as? NSNumber)?.int64Value ?? 0 + let free = (attrs[.systemFreeSize] as? NSNumber)?.int64Value ?? 0 + let used = max(0, total - free) + return OpenClawStorageStatusPayload(totalBytes: total, freeBytes: free, usedBytes: used) + } + + private static func modelIdentifier() -> String { + var systemInfo = utsname() + uname(&systemInfo) + let machine = withUnsafeBytes(of: &systemInfo.machine) { ptr in + String(bytes: ptr.prefix { $0 != 0 }, encoding: .utf8) + } + let trimmed = machine?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + return trimmed.isEmpty ? "unknown" : trimmed + } +} diff --git a/apps/ios/Sources/Device/NetworkStatusService.swift b/apps/ios/Sources/Device/NetworkStatusService.swift new file mode 100644 index 0000000000..7d92d1cc1c --- /dev/null +++ b/apps/ios/Sources/Device/NetworkStatusService.swift @@ -0,0 +1,69 @@ +import Foundation +import Network +import OpenClawKit + +final class NetworkStatusService: @unchecked Sendable { + func currentStatus(timeoutMs: Int = 1500) async -> OpenClawNetworkStatusPayload { + await withCheckedContinuation { cont in + let monitor = NWPathMonitor() + let queue = DispatchQueue(label: "bot.molt.ios.network-status") + let state = NetworkStatusState() + + monitor.pathUpdateHandler = { path in + guard state.markCompleted() else { return } + monitor.cancel() + cont.resume(returning: Self.payload(from: path)) + } + + monitor.start(queue: queue) + + queue.asyncAfter(deadline: .now() + .milliseconds(timeoutMs)) { + guard state.markCompleted() else { return } + monitor.cancel() + cont.resume(returning: Self.fallbackPayload()) + } + } + } + + private static func payload(from path: NWPath) -> OpenClawNetworkStatusPayload { + let status: OpenClawNetworkPathStatus = switch path.status { + case .satisfied: .satisfied + case .requiresConnection: .requiresConnection + case .unsatisfied: .unsatisfied + @unknown default: .unsatisfied + } + + var interfaces: [OpenClawNetworkInterfaceType] = [] + if path.usesInterfaceType(.wifi) { interfaces.append(.wifi) } + if path.usesInterfaceType(.cellular) { interfaces.append(.cellular) } + if path.usesInterfaceType(.wiredEthernet) { interfaces.append(.wired) } + if interfaces.isEmpty { interfaces.append(.other) } + + return OpenClawNetworkStatusPayload( + status: status, + isExpensive: path.isExpensive, + isConstrained: path.isConstrained, + interfaces: interfaces) + } + + private static func fallbackPayload() -> OpenClawNetworkStatusPayload { + OpenClawNetworkStatusPayload( + status: .unsatisfied, + isExpensive: false, + isConstrained: false, + interfaces: [.other]) + } +} + +private final class NetworkStatusState: @unchecked Sendable { + private let lock = NSLock() + private var completed = false + + func markCompleted() -> Bool { + self.lock.lock() + defer { self.lock.unlock() } + if self.completed { return false } + self.completed = true + return true + } +} diff --git a/apps/ios/Sources/Device/NodeDisplayName.swift b/apps/ios/Sources/Device/NodeDisplayName.swift new file mode 100644 index 0000000000..9ddf38b24a --- /dev/null +++ b/apps/ios/Sources/Device/NodeDisplayName.swift @@ -0,0 +1,48 @@ +import Foundation +import UIKit + +enum NodeDisplayName { + private static let genericNames: Set = ["iOS Node", "iPhone Node", "iPad Node"] + + static func isGeneric(_ name: String) -> Bool { + Self.genericNames.contains(name) + } + + static func defaultValue(for interfaceIdiom: UIUserInterfaceIdiom) -> String { + switch interfaceIdiom { + case .phone: + return "iPhone Node" + case .pad: + return "iPad Node" + default: + return "iOS Node" + } + } + + static func resolve( + existing: String?, + deviceName: String, + interfaceIdiom: UIUserInterfaceIdiom + ) -> String { + let trimmedExisting = existing?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + if !trimmedExisting.isEmpty, !Self.isGeneric(trimmedExisting) { + return trimmedExisting + } + + let trimmedDevice = deviceName.trimmingCharacters(in: .whitespacesAndNewlines) + if let normalized = Self.normalizedDeviceName(trimmedDevice) { + return normalized + } + + return Self.defaultValue(for: interfaceIdiom) + } + + private static func normalizedDeviceName(_ deviceName: String) -> String? { + guard !deviceName.isEmpty else { return nil } + let lower = deviceName.lowercased() + if lower.contains("iphone") || lower.contains("ipad") || lower.contains("ios") { + return deviceName + } + return nil + } +} diff --git a/apps/ios/Sources/Gateway/GatewayConnectConfig.swift b/apps/ios/Sources/Gateway/GatewayConnectConfig.swift new file mode 100644 index 0000000000..7f4e93380b --- /dev/null +++ b/apps/ios/Sources/Gateway/GatewayConnectConfig.swift @@ -0,0 +1,27 @@ +import Foundation +import OpenClawKit + +/// Single source of truth for "how we connect" to the current gateway. +/// +/// The iOS app maintains two WebSocket sessions to the same gateway: +/// - a `role=node` session for device capabilities (`node.invoke.*`) +/// - a `role=operator` session for chat/talk/config (`chat.*`, `talk.*`, etc.) +/// +/// Both sessions should derive all connection inputs from this config so we +/// don't accidentally persist gateway-scoped state under different keys. +struct GatewayConnectConfig: Sendable { + let url: URL + let stableID: String + let tls: GatewayTLSParams? + let token: String? + let password: String? + let nodeOptions: GatewayConnectOptions + + /// Stable, non-empty identifier used for gateway-scoped persistence keys. + /// If the caller doesn't provide a stableID, fall back to URL identity. + var effectiveStableID: String { + let trimmed = self.stableID.trimmingCharacters(in: .whitespacesAndNewlines) + if trimmed.isEmpty { return self.url.absoluteString } + return trimmed + } +} diff --git a/apps/ios/Sources/Gateway/GatewayConnectionController.swift b/apps/ios/Sources/Gateway/GatewayConnectionController.swift index 65d099c010..34af7f1dc0 100644 --- a/apps/ios/Sources/Gateway/GatewayConnectionController.swift +++ b/apps/ios/Sources/Gateway/GatewayConnectionController.swift @@ -1,8 +1,15 @@ -import OpenClawKit -import Darwin +import AVFoundation +import Contacts +import CoreLocation +import CoreMotion +import EventKit import Foundation +import OpenClawKit import Network import Observation +import Photos +import ReplayKit +import Speech import SwiftUI import UIKit @@ -42,8 +49,10 @@ final class GatewayConnectionController { self.discovery.stop() case .active, .inactive: self.discovery.start() + self.attemptAutoReconnectIfNeeded() @unknown default: self.discovery.start() + self.attemptAutoReconnectIfNeeded() } } @@ -60,6 +69,11 @@ final class GatewayConnectionController { port: port, useTLS: tlsParams?.required == true) else { return } + GatewaySettingsStore.saveLastGatewayConnection( + host: host, + port: port, + useTLS: tlsParams?.required == true, + stableID: gateway.stableID) self.didAutoConnect = true self.startAutoConnect( url: url, @@ -74,13 +88,24 @@ final class GatewayConnectionController { .trimmingCharacters(in: .whitespacesAndNewlines) ?? "" let token = GatewaySettingsStore.loadGatewayToken(instanceId: instanceId) let password = GatewaySettingsStore.loadGatewayPassword(instanceId: instanceId) - let stableID = self.manualStableID(host: host, port: port) - let tlsParams = self.resolveManualTLSParams(stableID: stableID, tlsEnabled: useTLS) + let resolvedUseTLS = useTLS + guard let resolvedPort = self.resolveManualPort(host: host, port: port, useTLS: resolvedUseTLS) + else { return } + let stableID = self.manualStableID(host: host, port: resolvedPort) + let tlsParams = self.resolveManualTLSParams( + stableID: stableID, + tlsEnabled: resolvedUseTLS, + allowTOFUReset: self.shouldForceTLS(host: host)) guard let url = self.buildGatewayURL( host: host, - port: port, + port: resolvedPort, useTLS: tlsParams?.required == true) else { return } + GatewaySettingsStore.saveLastGatewayConnection( + host: host, + port: resolvedPort, + useTLS: tlsParams?.required == true, + stableID: stableID) self.didAutoConnect = true self.startAutoConnect( url: url, @@ -90,6 +115,38 @@ final class GatewayConnectionController { password: password) } + func connectLastKnown() async { + guard let last = GatewaySettingsStore.loadLastGatewayConnection() else { return } + let instanceId = UserDefaults.standard.string(forKey: "node.instanceId")? + .trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + let token = GatewaySettingsStore.loadGatewayToken(instanceId: instanceId) + let password = GatewaySettingsStore.loadGatewayPassword(instanceId: instanceId) + let resolvedUseTLS = last.useTLS + let tlsParams = self.resolveManualTLSParams( + stableID: last.stableID, + tlsEnabled: resolvedUseTLS, + allowTOFUReset: self.shouldForceTLS(host: last.host)) + guard let url = self.buildGatewayURL( + host: last.host, + port: last.port, + useTLS: tlsParams?.required == true) + else { return } + if resolvedUseTLS != last.useTLS { + GatewaySettingsStore.saveLastGatewayConnection( + host: last.host, + port: last.port, + useTLS: resolvedUseTLS, + stableID: last.stableID) + } + self.didAutoConnect = true + self.startAutoConnect( + url: url, + gatewayStableID: last.stableID, + tls: tlsParams, + token: token, + password: password) + } + private func updateFromDiscovery() { let newGateways = self.discovery.gateways self.gateways = newGateways @@ -119,6 +176,7 @@ final class GatewayConnectionController { guard appModel.gatewayServerName == nil else { return } let defaults = UserDefaults.standard + guard defaults.bool(forKey: "gateway.autoconnect") else { return } let manualEnabled = defaults.bool(forKey: "gateway.manual.enabled") let instanceId = defaults.string(forKey: "node.instanceId")? @@ -134,11 +192,19 @@ final class GatewayConnectionController { guard !manualHost.isEmpty else { return } let manualPort = defaults.integer(forKey: "gateway.manual.port") - let resolvedPort = manualPort > 0 ? manualPort : 18789 let manualTLS = defaults.bool(forKey: "gateway.manual.tls") + let resolvedUseTLS = manualTLS || self.shouldForceTLS(host: manualHost) + guard let resolvedPort = self.resolveManualPort( + host: manualHost, + port: manualPort, + useTLS: resolvedUseTLS) + else { return } let stableID = self.manualStableID(host: manualHost, port: resolvedPort) - let tlsParams = self.resolveManualTLSParams(stableID: stableID, tlsEnabled: manualTLS) + let tlsParams = self.resolveManualTLSParams( + stableID: stableID, + tlsEnabled: resolvedUseTLS, + allowTOFUReset: self.shouldForceTLS(host: manualHost)) guard let url = self.buildGatewayURL( host: manualHost, @@ -156,30 +222,80 @@ final class GatewayConnectionController { return } + if let lastKnown = GatewaySettingsStore.loadLastGatewayConnection() { + let resolvedUseTLS = lastKnown.useTLS || self.shouldForceTLS(host: lastKnown.host) + let tlsParams = self.resolveManualTLSParams( + stableID: lastKnown.stableID, + tlsEnabled: resolvedUseTLS, + allowTOFUReset: self.shouldForceTLS(host: lastKnown.host)) + guard let url = self.buildGatewayURL( + host: lastKnown.host, + port: lastKnown.port, + useTLS: tlsParams?.required == true) + else { return } + + self.didAutoConnect = true + self.startAutoConnect( + url: url, + gatewayStableID: lastKnown.stableID, + tls: tlsParams, + token: token, + password: password) + return + } + let preferredStableID = defaults.string(forKey: "gateway.preferredStableID")? .trimmingCharacters(in: .whitespacesAndNewlines) ?? "" let lastDiscoveredStableID = defaults.string(forKey: "gateway.lastDiscoveredStableID")? .trimmingCharacters(in: .whitespacesAndNewlines) ?? "" let candidates = [preferredStableID, lastDiscoveredStableID].filter { !$0.isEmpty } - guard let targetStableID = candidates.first(where: { id in + if let targetStableID = candidates.first(where: { id in self.gateways.contains(where: { $0.stableID == id }) - }) else { return } + }) { + guard let target = self.gateways.first(where: { $0.stableID == targetStableID }) else { return } + guard let host = self.resolveGatewayHost(target) else { return } + let port = target.gatewayPort ?? 18789 + let tlsParams = self.resolveDiscoveredTLSParams(gateway: target) + guard let url = self.buildGatewayURL(host: host, port: port, useTLS: tlsParams?.required == true) + else { return } - guard let target = self.gateways.first(where: { $0.stableID == targetStableID }) else { return } - guard let host = self.resolveGatewayHost(target) else { return } - let port = target.gatewayPort ?? 18789 - let tlsParams = self.resolveDiscoveredTLSParams(gateway: target) - guard let url = self.buildGatewayURL(host: host, port: port, useTLS: tlsParams?.required == true) - else { return } + self.didAutoConnect = true + self.startAutoConnect( + url: url, + gatewayStableID: target.stableID, + tls: tlsParams, + token: token, + password: password) + return + } - self.didAutoConnect = true - self.startAutoConnect( - url: url, - gatewayStableID: target.stableID, - tls: tlsParams, - token: token, - password: password) + if self.gateways.count == 1, let gateway = self.gateways.first { + guard let host = self.resolveGatewayHost(gateway) else { return } + let port = gateway.gatewayPort ?? 18789 + let tlsParams = self.resolveDiscoveredTLSParams(gateway: gateway) + guard let url = self.buildGatewayURL(host: host, port: port, useTLS: tlsParams?.required == true) + else { return } + + self.didAutoConnect = true + self.startAutoConnect( + url: url, + gatewayStableID: gateway.stableID, + tls: tlsParams, + token: token, + password: password) + return + } + } + + private func attemptAutoReconnectIfNeeded() { + guard let appModel = self.appModel else { return } + guard appModel.gatewayAutoReconnectEnabled else { return } + // Avoid starting duplicate connect loops while a prior config is active. + guard appModel.activeGatewayConnectConfig == nil else { return } + guard UserDefaults.standard.bool(forKey: "gateway.autoconnect") else { return } + self.didAutoConnect = false + self.maybeAutoConnect() } private func updateLastDiscoveredGateway(from gateways: [GatewayDiscoveryModel.DiscoveredGateway]) { @@ -205,20 +321,21 @@ final class GatewayConnectionController { password: String?) { guard let appModel else { return } - let connectOptions = self.makeConnectOptions() + let connectOptions = self.makeConnectOptions(stableID: gatewayStableID) - Task { [weak self] in - guard let self else { return } + Task { [weak appModel] in + guard let appModel else { return } await MainActor.run { appModel.gatewayStatusText = "Connecting…" } - appModel.connectToGateway( + let cfg = GatewayConnectConfig( url: url, - gatewayStableID: gatewayStableID, + stableID: gatewayStableID, tls: tls, token: token, password: password, - connectOptions: connectOptions) + nodeOptions: connectOptions) + appModel.applyGatewayConnectConfig(cfg) } } @@ -237,13 +354,17 @@ final class GatewayConnectionController { return nil } - private func resolveManualTLSParams(stableID: String, tlsEnabled: Bool) -> GatewayTLSParams? { + private func resolveManualTLSParams( + stableID: String, + tlsEnabled: Bool, + allowTOFUReset: Bool = false) -> GatewayTLSParams? + { let stored = GatewayTLSStore.loadFingerprint(stableID: stableID) if tlsEnabled || stored != nil { return GatewayTLSParams( required: true, expectedFingerprint: stored, - allowTOFU: stored == nil, + allowTOFU: stored == nil || allowTOFUReset, storeKey: stableID) } @@ -251,12 +372,12 @@ final class GatewayConnectionController { } private func resolveGatewayHost(_ gateway: GatewayDiscoveryModel.DiscoveredGateway) -> String? { - if let lanHost = gateway.lanHost?.trimmingCharacters(in: .whitespacesAndNewlines), !lanHost.isEmpty { - return lanHost - } if let tailnet = gateway.tailnetDns?.trimmingCharacters(in: .whitespacesAndNewlines), !tailnet.isEmpty { return tailnet } + if let lanHost = gateway.lanHost?.trimmingCharacters(in: .whitespacesAndNewlines), !lanHost.isEmpty { + return lanHost + } return nil } @@ -269,38 +390,69 @@ final class GatewayConnectionController { return components.url } + private func shouldForceTLS(host: String) -> Bool { + let trimmed = host.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + if trimmed.isEmpty { return false } + return trimmed.hasSuffix(".ts.net") || trimmed.hasSuffix(".ts.net.") + } + private func manualStableID(host: String, port: Int) -> String { "manual|\(host.lowercased())|\(port)" } - private func makeConnectOptions() -> GatewayConnectOptions { + private func makeConnectOptions(stableID: String?) -> GatewayConnectOptions { let defaults = UserDefaults.standard let displayName = self.resolvedDisplayName(defaults: defaults) + let resolvedClientId = self.resolvedClientId(defaults: defaults, stableID: stableID) return GatewayConnectOptions( role: "node", scopes: [], caps: self.currentCaps(), commands: self.currentCommands(), - permissions: [:], - clientId: "openclaw-ios", + permissions: self.currentPermissions(), + clientId: resolvedClientId, clientMode: "node", clientDisplayName: displayName) } + private func resolvedClientId(defaults: UserDefaults, stableID: String?) -> String { + if let stableID, + let override = GatewaySettingsStore.loadGatewayClientIdOverride(stableID: stableID) { + return override + } + let manualClientId = defaults.string(forKey: "gateway.manual.clientId")? + .trimmingCharacters(in: .whitespacesAndNewlines) + if manualClientId?.isEmpty == false { + return manualClientId! + } + return "openclaw-ios" + } + + private func resolveManualPort(host: String, port: Int, useTLS: Bool) -> Int? { + if port > 0 { + return port <= 65535 ? port : nil + } + let trimmedHost = host.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmedHost.isEmpty else { return nil } + if useTLS && self.shouldForceTLS(host: trimmedHost) { + return 443 + } + return 18789 + } + private func resolvedDisplayName(defaults: UserDefaults) -> String { let key = "node.displayName" - let existing = defaults.string(forKey: key)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" - if !existing.isEmpty, existing != "iOS Node" { return existing } - - let deviceName = UIDevice.current.name.trimmingCharacters(in: .whitespacesAndNewlines) - let candidate = deviceName.isEmpty ? "iOS Node" : deviceName - - if existing.isEmpty || existing == "iOS Node" { - defaults.set(candidate, forKey: key) + let existingRaw = defaults.string(forKey: key) + let resolved = NodeDisplayName.resolve( + existing: existingRaw, + deviceName: UIDevice.current.name, + interfaceIdiom: UIDevice.current.userInterfaceIdiom) + let existing = existingRaw?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + if existing.isEmpty || NodeDisplayName.isGeneric(existing) { + defaults.set(resolved, forKey: key) } - - return candidate + return resolved } private func currentCaps() -> [String] { @@ -320,6 +472,15 @@ final class GatewayConnectionController { let locationMode = OpenClawLocationMode(rawValue: locationModeRaw) ?? .off if locationMode != .off { caps.append(OpenClawCapability.location.rawValue) } + caps.append(OpenClawCapability.device.rawValue) + caps.append(OpenClawCapability.photos.rawValue) + caps.append(OpenClawCapability.contacts.rawValue) + caps.append(OpenClawCapability.calendar.rawValue) + caps.append(OpenClawCapability.reminders.rawValue) + if Self.motionAvailable() { + caps.append(OpenClawCapability.motion.rawValue) + } + return caps } @@ -335,10 +496,11 @@ final class GatewayConnectionController { OpenClawCanvasA2UICommand.reset.rawValue, OpenClawScreenCommand.record.rawValue, OpenClawSystemCommand.notify.rawValue, - OpenClawSystemCommand.which.rawValue, - OpenClawSystemCommand.run.rawValue, - OpenClawSystemCommand.execApprovalsGet.rawValue, - OpenClawSystemCommand.execApprovalsSet.rawValue, + OpenClawChatCommand.push.rawValue, + OpenClawTalkCommand.pttStart.rawValue, + OpenClawTalkCommand.pttStop.rawValue, + OpenClawTalkCommand.pttCancel.rawValue, + OpenClawTalkCommand.pttOnce.rawValue, ] let caps = Set(self.currentCaps()) @@ -350,10 +512,76 @@ final class GatewayConnectionController { if caps.contains(OpenClawCapability.location.rawValue) { commands.append(OpenClawLocationCommand.get.rawValue) } + if caps.contains(OpenClawCapability.device.rawValue) { + commands.append(OpenClawDeviceCommand.status.rawValue) + commands.append(OpenClawDeviceCommand.info.rawValue) + } + if caps.contains(OpenClawCapability.photos.rawValue) { + commands.append(OpenClawPhotosCommand.latest.rawValue) + } + if caps.contains(OpenClawCapability.contacts.rawValue) { + commands.append(OpenClawContactsCommand.search.rawValue) + commands.append(OpenClawContactsCommand.add.rawValue) + } + if caps.contains(OpenClawCapability.calendar.rawValue) { + commands.append(OpenClawCalendarCommand.events.rawValue) + commands.append(OpenClawCalendarCommand.add.rawValue) + } + if caps.contains(OpenClawCapability.reminders.rawValue) { + commands.append(OpenClawRemindersCommand.list.rawValue) + commands.append(OpenClawRemindersCommand.add.rawValue) + } + if caps.contains(OpenClawCapability.motion.rawValue) { + commands.append(OpenClawMotionCommand.activity.rawValue) + commands.append(OpenClawMotionCommand.pedometer.rawValue) + } return commands } + private func currentPermissions() -> [String: Bool] { + var permissions: [String: Bool] = [:] + permissions["camera"] = AVCaptureDevice.authorizationStatus(for: .video) == .authorized + permissions["microphone"] = AVCaptureDevice.authorizationStatus(for: .audio) == .authorized + permissions["speechRecognition"] = SFSpeechRecognizer.authorizationStatus() == .authorized + permissions["location"] = Self.isLocationAuthorized( + status: CLLocationManager().authorizationStatus) + && CLLocationManager.locationServicesEnabled() + permissions["screenRecording"] = RPScreenRecorder.shared().isAvailable + + let photoStatus = PHPhotoLibrary.authorizationStatus(for: .readWrite) + permissions["photos"] = photoStatus == .authorized || photoStatus == .limited + let contactsStatus = CNContactStore.authorizationStatus(for: .contacts) + permissions["contacts"] = contactsStatus == .authorized || contactsStatus == .limited + + let calendarStatus = EKEventStore.authorizationStatus(for: .event) + permissions["calendar"] = + calendarStatus == .authorized || calendarStatus == .fullAccess || calendarStatus == .writeOnly + let remindersStatus = EKEventStore.authorizationStatus(for: .reminder) + permissions["reminders"] = + remindersStatus == .authorized || remindersStatus == .fullAccess || remindersStatus == .writeOnly + + let motionStatus = CMMotionActivityManager.authorizationStatus() + let pedometerStatus = CMPedometer.authorizationStatus() + permissions["motion"] = + motionStatus == .authorized || pedometerStatus == .authorized + + return permissions + } + + private static func isLocationAuthorized(status: CLAuthorizationStatus) -> Bool { + switch status { + case .authorizedAlways, .authorizedWhenInUse, .authorized: + return true + default: + return false + } + } + + private static func motionAvailable() -> Bool { + CMMotionActivityManager.isActivityAvailable() || CMPedometer.isStepCountingAvailable() + } + private func platformString() -> String { let v = ProcessInfo.processInfo.operatingSystemVersion let name = switch UIDevice.current.userInterfaceIdiom { @@ -407,6 +635,10 @@ extension GatewayConnectionController { self.currentCommands() } + func _test_currentPermissions() -> [String: Bool] { + self.currentPermissions() + } + func _test_platformString() -> String { self.platformString() } diff --git a/apps/ios/Sources/Gateway/GatewayHealthMonitor.swift b/apps/ios/Sources/Gateway/GatewayHealthMonitor.swift new file mode 100644 index 0000000000..182df942c9 --- /dev/null +++ b/apps/ios/Sources/Gateway/GatewayHealthMonitor.swift @@ -0,0 +1,85 @@ +import Foundation +import OpenClawKit + +@MainActor +final class GatewayHealthMonitor { + struct Config: Sendable { + var intervalSeconds: Double + var timeoutSeconds: Double + var maxFailures: Int + } + + private let config: Config + private let sleep: @Sendable (UInt64) async -> Void + private var task: Task? + + init( + config: Config = Config(intervalSeconds: 15, timeoutSeconds: 5, maxFailures: 3), + sleep: @escaping @Sendable (UInt64) async -> Void = { nanoseconds in + try? await Task.sleep(nanoseconds: nanoseconds) + } + ) { + self.config = config + self.sleep = sleep + } + + func start( + check: @escaping @Sendable () async throws -> Bool, + onFailure: @escaping @Sendable (_ failureCount: Int) async -> Void) + { + self.stop() + let config = self.config + let sleep = self.sleep + self.task = Task { @MainActor in + var failures = 0 + while !Task.isCancelled { + let ok = await Self.runCheck(check: check, timeoutSeconds: config.timeoutSeconds) + if ok { + failures = 0 + } else { + failures += 1 + if failures >= max(1, config.maxFailures) { + await onFailure(failures) + failures = 0 + } + } + + if Task.isCancelled { break } + let interval = max(0.0, config.intervalSeconds) + let nanos = UInt64(interval * 1_000_000_000) + if nanos > 0 { + await sleep(nanos) + } else { + await Task.yield() + } + } + } + } + + func stop() { + self.task?.cancel() + self.task = nil + } + + private static func runCheck( + check: @escaping @Sendable () async throws -> Bool, + timeoutSeconds: Double) async -> Bool + { + let timeout = max(0.0, timeoutSeconds) + if timeout == 0 { + return (try? await check()) ?? false + } + do { + let timeoutError = NSError( + domain: "GatewayHealthMonitor", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "health check timed out"]) + return try await AsyncTimeout.withTimeout( + seconds: timeout, + onTimeout: { timeoutError }, + operation: check) + } catch { + return false + } + } +} diff --git a/apps/ios/Sources/Gateway/GatewaySettingsStore.swift b/apps/ios/Sources/Gateway/GatewaySettingsStore.swift index 4560dab788..d227386523 100644 --- a/apps/ios/Sources/Gateway/GatewaySettingsStore.swift +++ b/apps/ios/Sources/Gateway/GatewaySettingsStore.swift @@ -1,4 +1,5 @@ import Foundation +import os enum GatewaySettingsStore { private static let gatewayService = "ai.openclaw.gateway" @@ -12,6 +13,12 @@ enum GatewaySettingsStore { private static let manualPortDefaultsKey = "gateway.manual.port" private static let manualTlsDefaultsKey = "gateway.manual.tls" private static let discoveryDebugLogsDefaultsKey = "gateway.discovery.debugLogs" + private static let lastGatewayHostDefaultsKey = "gateway.last.host" + private static let lastGatewayPortDefaultsKey = "gateway.last.port" + private static let lastGatewayTlsDefaultsKey = "gateway.last.tls" + private static let lastGatewayStableIDDefaultsKey = "gateway.last.stableID" + private static let clientIdOverrideDefaultsPrefix = "gateway.clientIdOverride." + private static let selectedAgentDefaultsPrefix = "gateway.selectedAgentId." private static let instanceIdAccount = "instanceId" private static let preferredGatewayStableIDAccount = "preferredStableID" @@ -107,6 +114,71 @@ enum GatewaySettingsStore { account: self.gatewayPasswordAccount(instanceId: instanceId)) } + static func saveLastGatewayConnection(host: String, port: Int, useTLS: Bool, stableID: String) { + let defaults = UserDefaults.standard + defaults.set(host, forKey: self.lastGatewayHostDefaultsKey) + defaults.set(port, forKey: self.lastGatewayPortDefaultsKey) + defaults.set(useTLS, forKey: self.lastGatewayTlsDefaultsKey) + defaults.set(stableID, forKey: self.lastGatewayStableIDDefaultsKey) + } + + static func loadLastGatewayConnection() -> (host: String, port: Int, useTLS: Bool, stableID: String)? { + let defaults = UserDefaults.standard + let host = defaults.string(forKey: self.lastGatewayHostDefaultsKey)? + .trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + let port = defaults.integer(forKey: self.lastGatewayPortDefaultsKey) + let useTLS = defaults.bool(forKey: self.lastGatewayTlsDefaultsKey) + let stableID = defaults.string(forKey: self.lastGatewayStableIDDefaultsKey)? + .trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + + guard !host.isEmpty, port > 0, port <= 65535, !stableID.isEmpty else { return nil } + return (host: host, port: port, useTLS: useTLS, stableID: stableID) + } + + static func loadGatewayClientIdOverride(stableID: String) -> String? { + let trimmedID = stableID.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmedID.isEmpty else { return nil } + let key = self.clientIdOverrideDefaultsPrefix + trimmedID + let value = UserDefaults.standard.string(forKey: key)? + .trimmingCharacters(in: .whitespacesAndNewlines) + if value?.isEmpty == false { return value } + return nil + } + + static func saveGatewayClientIdOverride(stableID: String, clientId: String?) { + let trimmedID = stableID.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmedID.isEmpty else { return } + let key = self.clientIdOverrideDefaultsPrefix + trimmedID + let trimmedClientId = clientId?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + if trimmedClientId.isEmpty { + UserDefaults.standard.removeObject(forKey: key) + } else { + UserDefaults.standard.set(trimmedClientId, forKey: key) + } + } + + static func loadGatewaySelectedAgentId(stableID: String) -> String? { + let trimmedID = stableID.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmedID.isEmpty else { return nil } + let key = self.selectedAgentDefaultsPrefix + trimmedID + let value = UserDefaults.standard.string(forKey: key)? + .trimmingCharacters(in: .whitespacesAndNewlines) + if value?.isEmpty == false { return value } + return nil + } + + static func saveGatewaySelectedAgentId(stableID: String, agentId: String?) { + let trimmedID = stableID.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmedID.isEmpty else { return } + let key = self.selectedAgentDefaultsPrefix + trimmedID + let trimmedAgentId = agentId?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + if trimmedAgentId.isEmpty { + UserDefaults.standard.removeObject(forKey: key) + } else { + UserDefaults.standard.set(trimmedAgentId, forKey: key) + } + } + private static func gatewayTokenAccount(instanceId: String) -> String { "gateway-token.\(instanceId)" } @@ -175,3 +247,101 @@ enum GatewaySettingsStore { } } + +enum GatewayDiagnostics { + private static let logger = Logger(subsystem: "ai.openclaw.ios", category: "GatewayDiag") + private static let queue = DispatchQueue(label: "ai.openclaw.gateway.diagnostics") + private static let maxLogBytes: Int64 = 512 * 1024 + private static let keepLogBytes: Int64 = 256 * 1024 + private static let logSizeCheckEveryWrites = 50 + nonisolated(unsafe) private static var logWritesSinceCheck = 0 + private static var fileURL: URL? { + FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first? + .appendingPathComponent("openclaw-gateway.log") + } + + private static func truncateLogIfNeeded(url: URL) { + guard let attrs = try? FileManager.default.attributesOfItem(atPath: url.path), + let sizeNumber = attrs[.size] as? NSNumber + else { return } + let size = sizeNumber.int64Value + guard size > self.maxLogBytes else { return } + + do { + let handle = try FileHandle(forReadingFrom: url) + defer { try? handle.close() } + + let start = max(Int64(0), size - self.keepLogBytes) + try handle.seek(toOffset: UInt64(start)) + var tail = try handle.readToEnd() ?? Data() + + // If we truncated mid-line, drop the first partial line so logs remain readable. + if start > 0, let nl = tail.firstIndex(of: 10) { + let next = tail.index(after: nl) + if next < tail.endIndex { + tail = tail.suffix(from: next) + } else { + tail = Data() + } + } + + try tail.write(to: url, options: .atomic) + } catch { + // Best-effort only. + } + } + + private static func appendToLog(url: URL, data: Data) { + if FileManager.default.fileExists(atPath: url.path) { + if let handle = try? FileHandle(forWritingTo: url) { + defer { try? handle.close() } + _ = try? handle.seekToEnd() + try? handle.write(contentsOf: data) + } + } else { + try? data.write(to: url, options: .atomic) + } + } + + static func bootstrap() { + guard let url = fileURL else { return } + queue.async { + self.truncateLogIfNeeded(url: url) + let formatter = ISO8601DateFormatter() + formatter.formatOptions = [.withInternetDateTime, .withFractionalSeconds] + let timestamp = formatter.string(from: Date()) + let line = "[\(timestamp)] gateway diagnostics started\n" + if let data = line.data(using: .utf8) { + self.appendToLog(url: url, data: data) + } + } + } + + static func log(_ message: String) { + let formatter = ISO8601DateFormatter() + formatter.formatOptions = [.withInternetDateTime, .withFractionalSeconds] + let timestamp = formatter.string(from: Date()) + let line = "[\(timestamp)] \(message)" + logger.info("\(line, privacy: .public)") + + guard let url = fileURL else { return } + queue.async { + self.logWritesSinceCheck += 1 + if self.logWritesSinceCheck >= self.logSizeCheckEveryWrites { + self.logWritesSinceCheck = 0 + self.truncateLogIfNeeded(url: url) + } + let entry = line + "\n" + if let data = entry.data(using: .utf8) { + self.appendToLog(url: url, data: data) + } + } + } + + static func reset() { + guard let url = fileURL else { return } + queue.async { + try? FileManager.default.removeItem(at: url) + } + } +} diff --git a/apps/ios/Sources/Media/PhotoLibraryService.swift b/apps/ios/Sources/Media/PhotoLibraryService.swift new file mode 100644 index 0000000000..f66beb3e70 --- /dev/null +++ b/apps/ios/Sources/Media/PhotoLibraryService.swift @@ -0,0 +1,164 @@ +import Foundation +import Photos +import OpenClawKit +import UIKit + +final class PhotoLibraryService: PhotosServicing { + // The gateway WebSocket has a max payload size; returning large base64 blobs + // can cause the gateway to close the connection. Keep photo payloads small + // enough to safely fit in a single RPC frame. + // + // This is a transport constraint (not a security policy). If callers need + // full-resolution media, we should switch to an HTTP media handle flow. + private static let maxTotalBase64Chars = 340 * 1024 + private static let maxPerPhotoBase64Chars = 300 * 1024 + + func latest(params: OpenClawPhotosLatestParams) async throws -> OpenClawPhotosLatestPayload { + let status = await Self.ensureAuthorization() + guard status == .authorized || status == .limited else { + throw NSError(domain: "Photos", code: 1, userInfo: [ + NSLocalizedDescriptionKey: "PHOTOS_PERMISSION_REQUIRED: grant Photos permission", + ]) + } + + let limit = max(1, min(params.limit ?? 1, 20)) + let fetchOptions = PHFetchOptions() + fetchOptions.fetchLimit = limit + fetchOptions.sortDescriptors = [NSSortDescriptor(key: "creationDate", ascending: false)] + let assets = PHAsset.fetchAssets(with: .image, options: fetchOptions) + + var results: [OpenClawPhotoPayload] = [] + var remainingBudget = Self.maxTotalBase64Chars + let maxWidth = params.maxWidth.flatMap { $0 > 0 ? $0 : nil } ?? 1600 + let quality = params.quality.map { max(0.1, min(1.0, $0)) } ?? 0.85 + let formatter = ISO8601DateFormatter() + + assets.enumerateObjects { asset, _, stop in + if results.count >= limit { stop.pointee = true; return } + if let payload = try? Self.renderAsset( + asset, + maxWidth: maxWidth, + quality: quality, + formatter: formatter) + { + // Keep the entire response under the gateway WS max payload. + if payload.base64.count > remainingBudget { + stop.pointee = true + return + } + remainingBudget -= payload.base64.count + results.append(payload) + } + } + + return OpenClawPhotosLatestPayload(photos: results) + } + + private static func ensureAuthorization() async -> PHAuthorizationStatus { + // Don’t prompt during node.invoke; prompts block the invoke and lead to timeouts. + PHPhotoLibrary.authorizationStatus(for: .readWrite) + } + + private static func renderAsset( + _ asset: PHAsset, + maxWidth: Int, + quality: Double, + formatter: ISO8601DateFormatter) throws -> OpenClawPhotoPayload + { + let manager = PHImageManager.default() + let options = PHImageRequestOptions() + options.isSynchronous = true + options.isNetworkAccessAllowed = true + options.deliveryMode = .highQualityFormat + + let targetSize: CGSize = { + guard maxWidth > 0 else { return PHImageManagerMaximumSize } + let aspect = CGFloat(asset.pixelHeight) / CGFloat(max(1, asset.pixelWidth)) + let width = CGFloat(maxWidth) + return CGSize(width: width, height: width * aspect) + }() + + var image: UIImage? + manager.requestImage( + for: asset, + targetSize: targetSize, + contentMode: .aspectFit, + options: options) + { result, _ in + image = result + } + + guard let image else { + throw NSError(domain: "Photos", code: 2, userInfo: [ + NSLocalizedDescriptionKey: "photo load failed", + ]) + } + + let (data, finalImage) = try encodeJpegUnderBudget( + image: image, + quality: quality, + maxBase64Chars: maxPerPhotoBase64Chars) + + let created = asset.creationDate.map { formatter.string(from: $0) } + return OpenClawPhotoPayload( + format: "jpeg", + base64: data.base64EncodedString(), + width: Int(finalImage.size.width), + height: Int(finalImage.size.height), + createdAt: created) + } + + private static func encodeJpegUnderBudget( + image: UIImage, + quality: Double, + maxBase64Chars: Int) throws -> (Data, UIImage) + { + var currentImage = image + var currentQuality = max(0.1, min(1.0, quality)) + + // Try lowering JPEG quality first, then downscale if needed. + for _ in 0..<10 { + guard let data = currentImage.jpegData(compressionQuality: currentQuality) else { + throw NSError(domain: "Photos", code: 3, userInfo: [ + NSLocalizedDescriptionKey: "photo encode failed", + ]) + } + + let base64Len = ((data.count + 2) / 3) * 4 + if base64Len <= maxBase64Chars { + return (data, currentImage) + } + + if currentQuality > 0.35 { + currentQuality = max(0.25, currentQuality - 0.15) + continue + } + + // Downscale by ~25% each step once quality is low. + let newWidth = max(240, currentImage.size.width * 0.75) + if newWidth >= currentImage.size.width { + break + } + currentImage = resize(image: currentImage, targetWidth: newWidth) + } + + throw NSError(domain: "Photos", code: 4, userInfo: [ + NSLocalizedDescriptionKey: "photo too large for gateway transport; try smaller maxWidth/quality", + ]) + } + + private static func resize(image: UIImage, targetWidth: CGFloat) -> UIImage { + let size = image.size + if size.width <= 0 || size.height <= 0 || targetWidth <= 0 { + return image + } + let scale = targetWidth / size.width + let targetSize = CGSize(width: targetWidth, height: max(1, size.height * scale)) + let format = UIGraphicsImageRendererFormat.default() + format.scale = 1 + let renderer = UIGraphicsImageRenderer(size: targetSize, format: format) + return renderer.image { _ in + image.draw(in: CGRect(origin: .zero, size: targetSize)) + } + } +} diff --git a/apps/ios/Sources/Model/NodeAppModel+Canvas.swift b/apps/ios/Sources/Model/NodeAppModel+Canvas.swift new file mode 100644 index 0000000000..372f8361d3 --- /dev/null +++ b/apps/ios/Sources/Model/NodeAppModel+Canvas.swift @@ -0,0 +1,97 @@ +import Foundation +import Network +import os + +extension NodeAppModel { + func _test_resolveA2UIHostURL() async -> String? { + await self.resolveA2UIHostURL() + } + + func resolveA2UIHostURL() async -> String? { + guard let raw = await self.gatewaySession.currentCanvasHostUrl() else { return nil } + let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty, let base = URL(string: trimmed) else { return nil } + if let host = base.host, Self.isLoopbackHost(host) { + return nil + } + return base.appendingPathComponent("__openclaw__/a2ui/").absoluteString + "?platform=ios" + } + + private static func isLoopbackHost(_ host: String) -> Bool { + let normalized = host.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + if normalized.isEmpty { return true } + if normalized == "localhost" || normalized == "::1" || normalized == "0.0.0.0" { + return true + } + if normalized == "127.0.0.1" || normalized.hasPrefix("127.") { + return true + } + return false + } + + func showA2UIOnConnectIfNeeded() async { + guard let a2uiUrl = await self.resolveA2UIHostURL() else { + await MainActor.run { + self.lastAutoA2uiURL = nil + self.screen.showDefaultCanvas() + } + return + } + let current = self.screen.urlString.trimmingCharacters(in: .whitespacesAndNewlines) + if current.isEmpty || current == self.lastAutoA2uiURL { + // Avoid navigating the WKWebView to an unreachable host: it leaves a persistent + // "could not connect to the server" overlay even when the gateway is connected. + if let url = URL(string: a2uiUrl), + await Self.probeTCP(url: url, timeoutSeconds: 2.5) + { + self.screen.navigate(to: a2uiUrl) + self.lastAutoA2uiURL = a2uiUrl + } else { + self.lastAutoA2uiURL = nil + self.screen.showDefaultCanvas() + } + } + } + + func showLocalCanvasOnDisconnect() { + self.lastAutoA2uiURL = nil + self.screen.showDefaultCanvas() + } + + private static func probeTCP(url: URL, timeoutSeconds: Double) async -> Bool { + guard let host = url.host, !host.isEmpty else { return false } + let portInt = url.port ?? ((url.scheme ?? "").lowercased() == "wss" ? 443 : 80) + guard portInt >= 1, portInt <= 65535 else { return false } + guard let nwPort = NWEndpoint.Port(rawValue: UInt16(portInt)) else { return false } + + let endpointHost = NWEndpoint.Host(host) + let connection = NWConnection(host: endpointHost, port: nwPort, using: .tcp) + return await withCheckedContinuation { cont in + let queue = DispatchQueue(label: "a2ui.preflight") + let finished = OSAllocatedUnfairLock(initialState: false) + let finish: @Sendable (Bool) -> Void = { ok in + let shouldResume = finished.withLock { flag -> Bool in + if flag { return false } + flag = true + return true + } + guard shouldResume else { return } + connection.cancel() + cont.resume(returning: ok) + } + + connection.stateUpdateHandler = { state in + switch state { + case .ready: + finish(true) + case .failed, .cancelled: + finish(false) + default: + break + } + } + connection.start(queue: queue) + queue.asyncAfter(deadline: .now() + timeoutSeconds) { finish(false) } + } + } +} diff --git a/apps/ios/Sources/Model/NodeAppModel.swift b/apps/ios/Sources/Model/NodeAppModel.swift index 963318a8a2..d41a619aa2 100644 --- a/apps/ios/Sources/Model/NodeAppModel.swift +++ b/apps/ios/Sources/Model/NodeAppModel.swift @@ -1,8 +1,42 @@ +import OpenClawChatUI import OpenClawKit -import Network +import OpenClawProtocol import Observation import SwiftUI import UIKit +import UserNotifications + +// Wrap errors without pulling non-Sendable types into async notification paths. +private struct NotificationCallError: Error, Sendable { + let message: String +} + +// Ensures notification requests return promptly even if the system prompt blocks. +private final class NotificationInvokeLatch: @unchecked Sendable { + private let lock = NSLock() + private var continuation: CheckedContinuation, Never>? + private var resumed = false + + func setContinuation(_ continuation: CheckedContinuation, Never>) { + self.lock.lock() + defer { self.lock.unlock() } + self.continuation = continuation + } + + func resume(_ response: Result) { + let cont: CheckedContinuation, Never>? + self.lock.lock() + if self.resumed { + self.lock.unlock() + return + } + self.resumed = true + cont = self.continuation + self.continuation = nil + self.lock.unlock() + cont?.resume(returning: response) + } +} @MainActor @Observable @@ -15,34 +49,108 @@ final class NodeAppModel { } var isBackgrounded: Bool = false - let screen = ScreenController() - let camera = CameraController() - private let screenRecorder = ScreenRecordService() + let screen: ScreenController + private let camera: any CameraServicing + private let screenRecorder: any ScreenRecordingServicing var gatewayStatusText: String = "Offline" var gatewayServerName: String? var gatewayRemoteAddress: String? var connectedGatewayID: String? + var gatewayAutoReconnectEnabled: Bool = true var seamColorHex: String? - var mainSessionKey: String = "main" + private var mainSessionBaseKey: String = "main" + var selectedAgentId: String? + var gatewayDefaultAgentId: String? + var gatewayAgents: [AgentSummary] = [] - private let gateway = GatewayNodeSession() - private var gatewayTask: Task? + var mainSessionKey: String { + let base = SessionKey.normalizeMainKey(self.mainSessionBaseKey) + let agentId = (self.selectedAgentId ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + let defaultId = (self.gatewayDefaultAgentId ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + if agentId.isEmpty || (!defaultId.isEmpty && agentId == defaultId) { return base } + return SessionKey.makeAgentSessionKey(agentId: agentId, baseKey: base) + } + + var activeAgentName: String { + let agentId = (self.selectedAgentId ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + let defaultId = (self.gatewayDefaultAgentId ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + let resolvedId = agentId.isEmpty ? defaultId : agentId + if resolvedId.isEmpty { return "Main" } + if let match = self.gatewayAgents.first(where: { $0.id == resolvedId }) { + let name = (match.name ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + return name.isEmpty ? match.id : name + } + return resolvedId + } + + // Primary "node" connection: used for device capabilities and node.invoke requests. + private let nodeGateway = GatewayNodeSession() + // Secondary "operator" connection: used for chat/talk/config/voicewake requests. + private let operatorGateway = GatewayNodeSession() + private var nodeGatewayTask: Task? + private var operatorGatewayTask: Task? private var voiceWakeSyncTask: Task? @ObservationIgnored private var cameraHUDDismissTask: Task? + @ObservationIgnored private lazy var capabilityRouter: NodeCapabilityRouter = self.buildCapabilityRouter() + private let gatewayHealthMonitor = GatewayHealthMonitor() + private var gatewayHealthMonitorDisabled = false + private let notificationCenter: NotificationCentering let voiceWake = VoiceWakeManager() - let talkMode = TalkModeManager() - private let locationService = LocationService() - private var lastAutoA2uiURL: String? + let talkMode: TalkModeManager + private let locationService: any LocationServicing + private let deviceStatusService: any DeviceStatusServicing + private let photosService: any PhotosServicing + private let contactsService: any ContactsServicing + private let calendarService: any CalendarServicing + private let remindersService: any RemindersServicing + private let motionService: any MotionServicing + var lastAutoA2uiURL: String? + private var pttVoiceWakeSuspended = false + private var talkVoiceWakeSuspended = false + private var backgroundVoiceWakeSuspended = false + private var backgroundTalkSuspended = false + private var backgroundedAt: Date? + private var reconnectAfterBackgroundArmed = false private var gatewayConnected = false - var gatewaySession: GatewayNodeSession { self.gateway } + private var operatorConnected = false + var gatewaySession: GatewayNodeSession { self.nodeGateway } + var operatorSession: GatewayNodeSession { self.operatorGateway } + private(set) var activeGatewayConnectConfig: GatewayConnectConfig? var cameraHUDText: String? var cameraHUDKind: CameraHUDKind? var cameraFlashNonce: Int = 0 var screenRecordActive: Bool = false - init() { + init( + screen: ScreenController = ScreenController(), + camera: any CameraServicing = CameraController(), + screenRecorder: any ScreenRecordingServicing = ScreenRecordService(), + locationService: any LocationServicing = LocationService(), + notificationCenter: NotificationCentering = LiveNotificationCenter(), + deviceStatusService: any DeviceStatusServicing = DeviceStatusService(), + photosService: any PhotosServicing = PhotoLibraryService(), + contactsService: any ContactsServicing = ContactsService(), + calendarService: any CalendarServicing = CalendarService(), + remindersService: any RemindersServicing = RemindersService(), + motionService: any MotionServicing = MotionService(), + talkMode: TalkModeManager = TalkModeManager()) + { + self.screen = screen + self.camera = camera + self.screenRecorder = screenRecorder + self.locationService = locationService + self.notificationCenter = notificationCenter + self.deviceStatusService = deviceStatusService + self.photosService = photosService + self.contactsService = contactsService + self.calendarService = calendarService + self.remindersService = remindersService + self.motionService = motionService + self.talkMode = talkMode + GatewayDiagnostics.bootstrap() + self.voiceWake.configure { [weak self] cmd in guard let self else { return } let sessionKey = await MainActor.run { self.mainSessionKey } @@ -55,9 +163,10 @@ final class NodeAppModel { let enabled = UserDefaults.standard.bool(forKey: "voiceWake.enabled") self.voiceWake.setEnabled(enabled) - self.talkMode.attachGateway(self.gateway) + self.talkMode.attachGateway(self.operatorGateway) let talkEnabled = UserDefaults.standard.bool(forKey: "talk.enabled") - self.talkMode.setEnabled(talkEnabled) + // Route through the coordinator so VoiceWake and Talk don't fight over the microphone. + self.setTalkEnabled(talkEnabled) // Wire up deep links from canvas taps self.screen.onDeepLink = { [weak self] url in @@ -107,7 +216,10 @@ final class NodeAppModel { return raw.isEmpty ? "-" : raw }() - let host = UserDefaults.standard.string(forKey: "node.displayName") ?? UIDevice.current.name + let host = NodeDisplayName.resolve( + existing: UserDefaults.standard.string(forKey: "node.displayName"), + deviceName: UIDevice.current.name, + interfaceIdiom: UIDevice.current.userInterfaceIdiom) let instanceId = (UserDefaults.standard.string(forKey: "node.instanceId") ?? "ios-node").lowercased() let contextJSON = OpenClawCanvasA2UIAction.compactJSON(userAction["context"]) let sessionKey = self.mainSessionKey @@ -150,33 +262,64 @@ final class NodeAppModel { } } - private func resolveA2UIHostURL() async -> String? { - guard let raw = await self.gateway.currentCanvasHostUrl() else { return nil } - let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines) - guard !trimmed.isEmpty, let base = URL(string: trimmed) else { return nil } - return base.appendingPathComponent("__openclaw__/a2ui/").absoluteString + "?platform=ios" - } - - private func showA2UIOnConnectIfNeeded() async { - guard let a2uiUrl = await self.resolveA2UIHostURL() else { return } - let current = self.screen.urlString.trimmingCharacters(in: .whitespacesAndNewlines) - if current.isEmpty || current == self.lastAutoA2uiURL { - self.screen.navigate(to: a2uiUrl) - self.lastAutoA2uiURL = a2uiUrl - } - } - - private func showLocalCanvasOnDisconnect() { - self.lastAutoA2uiURL = nil - self.screen.showDefaultCanvas() - } func setScenePhase(_ phase: ScenePhase) { switch phase { case .background: self.isBackgrounded = true + self.stopGatewayHealthMonitor() + self.backgroundedAt = Date() + self.reconnectAfterBackgroundArmed = true + // Be conservative: release the mic when the app backgrounds. + self.backgroundVoiceWakeSuspended = self.voiceWake.suspendForExternalAudioCapture() + self.backgroundTalkSuspended = self.talkMode.suspendForBackground() case .active, .inactive: self.isBackgrounded = false + if self.operatorConnected { + self.startGatewayHealthMonitor() + } + if phase == .active { + self.voiceWake.resumeAfterExternalAudioCapture(wasSuspended: self.backgroundVoiceWakeSuspended) + self.backgroundVoiceWakeSuspended = false + Task { [weak self] in + guard let self else { return } + let suspended = await MainActor.run { self.backgroundTalkSuspended } + await MainActor.run { self.backgroundTalkSuspended = false } + await self.talkMode.resumeAfterBackground(wasSuspended: suspended) + } + } + if phase == .active, self.reconnectAfterBackgroundArmed { + self.reconnectAfterBackgroundArmed = false + let backgroundedFor = self.backgroundedAt.map { Date().timeIntervalSince($0) } ?? 0 + self.backgroundedAt = nil + // iOS may suspend network sockets in background without a clean close. + // On foreground, force a fresh handshake to avoid "connected but dead" states. + if backgroundedFor >= 3.0 { + Task { [weak self] in + guard let self else { return } + let operatorWasConnected = await MainActor.run { self.operatorConnected } + if operatorWasConnected { + // Prefer keeping the connection if it's healthy; reconnect only when needed. + let healthy = (try? await self.operatorGateway.request( + method: "health", + paramsJSON: nil, + timeoutSeconds: 2)) != nil + if healthy { + await MainActor.run { self.startGatewayHealthMonitor() } + return + } + } + + await self.operatorGateway.disconnect() + await self.nodeGateway.disconnect() + await MainActor.run { + self.operatorConnected = false + self.gatewayConnected = false + self.talkMode.updateGatewayConnected(false) + } + } + } + } @unknown default: self.isBackgrounded = false } @@ -184,9 +327,29 @@ final class NodeAppModel { func setVoiceWakeEnabled(_ enabled: Bool) { self.voiceWake.setEnabled(enabled) + if enabled { + // If talk is enabled, voice wake should not grab the mic. + if self.talkMode.isEnabled { + self.voiceWake.setSuppressedByTalk(true) + self.talkVoiceWakeSuspended = self.voiceWake.suspendForExternalAudioCapture() + } + } else { + self.voiceWake.setSuppressedByTalk(false) + self.talkVoiceWakeSuspended = false + } } func setTalkEnabled(_ enabled: Bool) { + if enabled { + // Voice wake holds the microphone continuously; talk mode needs exclusive access for STT. + // When talk is enabled from the UI, prioritize talk and pause voice wake. + self.voiceWake.setSuppressedByTalk(true) + self.talkVoiceWakeSuspended = self.voiceWake.suspendForExternalAudioCapture() + } else { + self.voiceWake.setSuppressedByTalk(false) + self.voiceWake.resumeAfterExternalAudioCapture(wasSuspended: self.talkVoiceWakeSuspended) + self.talkVoiceWakeSuspended = false + } self.talkMode.setEnabled(enabled) } @@ -203,143 +366,13 @@ final class NodeAppModel { } } - func connectToGateway( - url: URL, - gatewayStableID: String, - tls: GatewayTLSParams?, - token: String?, - password: String?, - connectOptions: GatewayConnectOptions) - { - self.gatewayTask?.cancel() - self.gatewayServerName = nil - self.gatewayRemoteAddress = nil - let id = gatewayStableID.trimmingCharacters(in: .whitespacesAndNewlines) - self.connectedGatewayID = id.isEmpty ? url.absoluteString : id - self.gatewayConnected = false - self.voiceWakeSyncTask?.cancel() - self.voiceWakeSyncTask = nil - let sessionBox = tls.map { WebSocketSessionBox(session: GatewayTLSPinningSession(params: $0)) } - - self.gatewayTask = Task { - var attempt = 0 - while !Task.isCancelled { - await MainActor.run { - if attempt == 0 { - self.gatewayStatusText = "Connecting…" - } else { - self.gatewayStatusText = "Reconnecting…" - } - self.gatewayServerName = nil - self.gatewayRemoteAddress = nil - } - - do { - try await self.gateway.connect( - url: url, - token: token, - password: password, - connectOptions: connectOptions, - sessionBox: sessionBox, - onConnected: { [weak self] in - guard let self else { return } - await MainActor.run { - self.gatewayStatusText = "Connected" - self.gatewayServerName = url.host ?? "gateway" - self.gatewayConnected = true - } - if let addr = await self.gateway.currentRemoteAddress() { - await MainActor.run { - self.gatewayRemoteAddress = addr - } - } - await self.refreshBrandingFromGateway() - await self.startVoiceWakeSync() - await self.showA2UIOnConnectIfNeeded() - }, - onDisconnected: { [weak self] reason in - guard let self else { return } - await MainActor.run { - self.gatewayStatusText = "Disconnected" - self.gatewayRemoteAddress = nil - self.gatewayConnected = false - self.showLocalCanvasOnDisconnect() - self.gatewayStatusText = "Disconnected: \(reason)" - } - }, - onInvoke: { [weak self] req in - guard let self else { - return BridgeInvokeResponse( - id: req.id, - ok: false, - error: OpenClawNodeError( - code: .unavailable, - message: "UNAVAILABLE: node not ready")) - } - return await self.handleInvoke(req) - }) - - if Task.isCancelled { break } - attempt = 0 - try? await Task.sleep(nanoseconds: 1_000_000_000) - } catch { - if Task.isCancelled { break } - attempt += 1 - await MainActor.run { - self.gatewayStatusText = "Gateway error: \(error.localizedDescription)" - self.gatewayServerName = nil - self.gatewayRemoteAddress = nil - self.gatewayConnected = false - self.showLocalCanvasOnDisconnect() - } - let sleepSeconds = min(8.0, 0.5 * pow(1.7, Double(attempt))) - try? await Task.sleep(nanoseconds: UInt64(sleepSeconds * 1_000_000_000)) - } - } - - await MainActor.run { - self.gatewayStatusText = "Offline" - self.gatewayServerName = nil - self.gatewayRemoteAddress = nil - self.connectedGatewayID = nil - self.gatewayConnected = false - self.seamColorHex = nil - if !SessionKey.isCanonicalMainSessionKey(self.mainSessionKey) { - self.mainSessionKey = "main" - self.talkMode.updateMainSessionKey(self.mainSessionKey) - } - self.showLocalCanvasOnDisconnect() - } - } - } - - func disconnectGateway() { - self.gatewayTask?.cancel() - self.gatewayTask = nil - self.voiceWakeSyncTask?.cancel() - self.voiceWakeSyncTask = nil - Task { await self.gateway.disconnect() } - self.gatewayStatusText = "Offline" - self.gatewayServerName = nil - self.gatewayRemoteAddress = nil - self.connectedGatewayID = nil - self.gatewayConnected = false - self.seamColorHex = nil - if !SessionKey.isCanonicalMainSessionKey(self.mainSessionKey) { - self.mainSessionKey = "main" - self.talkMode.updateMainSessionKey(self.mainSessionKey) - } - self.showLocalCanvasOnDisconnect() - } - private func applyMainSessionKey(_ key: String?) { let trimmed = (key ?? "").trimmingCharacters(in: .whitespacesAndNewlines) guard !trimmed.isEmpty else { return } - let current = self.mainSessionKey.trimmingCharacters(in: .whitespacesAndNewlines) - if SessionKey.isCanonicalMainSessionKey(current) { return } + let current = self.mainSessionBaseKey.trimmingCharacters(in: .whitespacesAndNewlines) if trimmed == current { return } - self.mainSessionKey = trimmed - self.talkMode.updateMainSessionKey(trimmed) + self.mainSessionBaseKey = trimmed + self.talkMode.updateMainSessionKey(self.mainSessionKey) } var seamColor: Color { @@ -361,7 +394,7 @@ final class NodeAppModel { private func refreshBrandingFromGateway() async { do { - let res = try await self.gateway.request(method: "config.get", paramsJSON: "{}", timeoutSeconds: 8) + let res = try await self.operatorGateway.request(method: "config.get", paramsJSON: "{}", timeoutSeconds: 8) guard let json = try JSONSerialization.jsonObject(with: res) as? [String: Any] else { return } guard let config = json["config"] as? [String: Any] else { return } let ui = config["ui"] as? [String: Any] @@ -370,16 +403,52 @@ final class NodeAppModel { let mainKey = SessionKey.normalizeMainKey(session?["mainKey"] as? String) await MainActor.run { self.seamColorHex = raw.isEmpty ? nil : raw - if !SessionKey.isCanonicalMainSessionKey(self.mainSessionKey) { - self.mainSessionKey = mainKey - self.talkMode.updateMainSessionKey(mainKey) - } + self.mainSessionBaseKey = mainKey + self.talkMode.updateMainSessionKey(self.mainSessionKey) } } catch { + if let gatewayError = error as? GatewayResponseError { + let lower = gatewayError.message.lowercased() + if lower.contains("unauthorized role") { + return + } + } // ignore } } + private func refreshAgentsFromGateway() async { + do { + let res = try await self.operatorGateway.request(method: "agents.list", paramsJSON: "{}", timeoutSeconds: 8) + let decoded = try JSONDecoder().decode(AgentsListResult.self, from: res) + await MainActor.run { + self.gatewayDefaultAgentId = decoded.defaultid + self.gatewayAgents = decoded.agents + self.applyMainSessionKey(decoded.mainkey) + + let selected = (self.selectedAgentId ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + if !selected.isEmpty && !decoded.agents.contains(where: { $0.id == selected }) { + self.selectedAgentId = nil + } + self.talkMode.updateMainSessionKey(self.mainSessionKey) + } + } catch { + // Best-effort only. + } + } + + func setSelectedAgentId(_ agentId: String?) { + let trimmed = (agentId ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + let stableID = (self.connectedGatewayID ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + if stableID.isEmpty { + self.selectedAgentId = trimmed.isEmpty ? nil : trimmed + } else { + self.selectedAgentId = trimmed.isEmpty ? nil : trimmed + GatewaySettingsStore.saveGatewaySelectedAgentId(stableID: stableID, agentId: self.selectedAgentId) + } + self.talkMode.updateMainSessionKey(self.mainSessionKey) + } + func setGlobalWakeWords(_ words: [String]) async { let sanitized = VoiceWakePreferences.sanitizeTriggerWords(words) @@ -392,7 +461,7 @@ final class NodeAppModel { else { return } do { - _ = try await self.gateway.request(method: "voicewake.set", paramsJSON: json, timeoutSeconds: 12) + _ = try await self.operatorGateway.request(method: "voicewake.set", paramsJSON: json, timeoutSeconds: 12) } catch { // Best-effort only. } @@ -403,9 +472,11 @@ final class NodeAppModel { self.voiceWakeSyncTask = Task { [weak self] in guard let self else { return } - await self.refreshWakeWordsFromGateway() + if !(await self.isGatewayHealthMonitorDisabled()) { + await self.refreshWakeWordsFromGateway() + } - let stream = await self.gateway.subscribeServerEvents(bufferingNewest: 200) + let stream = await self.operatorGateway.subscribeServerEvents(bufferingNewest: 200) for await evt in stream { if Task.isCancelled { return } guard evt.event == "voicewake.changed" else { continue } @@ -418,16 +489,68 @@ final class NodeAppModel { } } + private func startGatewayHealthMonitor() { + self.gatewayHealthMonitorDisabled = false + self.gatewayHealthMonitor.start( + check: { [weak self] in + guard let self else { return false } + if await self.isGatewayHealthMonitorDisabled() { return true } + do { + let data = try await self.operatorGateway.request(method: "health", paramsJSON: nil, timeoutSeconds: 6) + guard let decoded = try? JSONDecoder().decode(OpenClawGatewayHealthOK.self, from: data) else { + return false + } + return decoded.ok ?? false + } catch { + if let gatewayError = error as? GatewayResponseError { + let lower = gatewayError.message.lowercased() + if lower.contains("unauthorized role") { + await self.setGatewayHealthMonitorDisabled(true) + return true + } + } + return false + } + }, + onFailure: { [weak self] _ in + guard let self else { return } + await self.operatorGateway.disconnect() + await MainActor.run { + self.operatorConnected = false + self.talkMode.updateGatewayConnected(false) + } + }) + } + + private func stopGatewayHealthMonitor() { + self.gatewayHealthMonitor.stop() + } + private func refreshWakeWordsFromGateway() async { do { - let data = try await self.gateway.request(method: "voicewake.get", paramsJSON: "{}", timeoutSeconds: 8) + let data = try await self.operatorGateway.request(method: "voicewake.get", paramsJSON: "{}", timeoutSeconds: 8) guard let triggers = VoiceWakePreferences.decodeGatewayTriggers(from: data) else { return } VoiceWakePreferences.saveTriggerWords(triggers) } catch { + if let gatewayError = error as? GatewayResponseError { + let lower = gatewayError.message.lowercased() + if lower.contains("unauthorized role") { + await self.setGatewayHealthMonitorDisabled(true) + return + } + } // Best-effort only. } } + private func isGatewayHealthMonitorDisabled() -> Bool { + self.gatewayHealthMonitorDisabled + } + + private func setGatewayHealthMonitorDisabled(_ disabled: Bool) { + self.gatewayHealthMonitorDisabled = disabled + } + func sendVoiceTranscript(text: String, sessionKey: String?) async throws { if await !self.isGatewayConnected() { throw NSError(domain: "Gateway", code: 10, userInfo: [ @@ -445,7 +568,7 @@ final class NodeAppModel { NSLocalizedDescriptionKey: "Failed to encode voice transcript payload as UTF-8", ]) } - await self.gateway.sendEvent(event: "voice.transcript", payloadJSON: json) + await self.nodeGateway.sendEvent(event: "voice.transcript", payloadJSON: json) } func handleDeepLink(url: URL) async { @@ -494,7 +617,7 @@ final class NodeAppModel { NSLocalizedDescriptionKey: "Failed to encode agent request payload as UTF-8", ]) } - await self.gateway.sendEvent(event: "agent.request", payloadJSON: json) + await self.nodeGateway.sendEvent(event: "agent.request", payloadJSON: json) } private func isGatewayConnected() async -> Bool { @@ -523,30 +646,19 @@ final class NodeAppModel { } do { - switch command { - case OpenClawLocationCommand.get.rawValue: - return try await self.handleLocationInvoke(req) - case OpenClawCanvasCommand.present.rawValue, - OpenClawCanvasCommand.hide.rawValue, - OpenClawCanvasCommand.navigate.rawValue, - OpenClawCanvasCommand.evalJS.rawValue, - OpenClawCanvasCommand.snapshot.rawValue: - return try await self.handleCanvasInvoke(req) - case OpenClawCanvasA2UICommand.reset.rawValue, - OpenClawCanvasA2UICommand.push.rawValue, - OpenClawCanvasA2UICommand.pushJSONL.rawValue: - return try await self.handleCanvasA2UIInvoke(req) - case OpenClawCameraCommand.list.rawValue, - OpenClawCameraCommand.snap.rawValue, - OpenClawCameraCommand.clip.rawValue: - return try await self.handleCameraInvoke(req) - case OpenClawScreenCommand.record.rawValue: - return try await self.handleScreenRecordInvoke(req) - default: + return try await self.capabilityRouter.handle(req) + } catch let error as NodeCapabilityRouter.RouterError { + switch error { + case .unknownCommand: return BridgeInvokeResponse( id: req.id, ok: false, error: OpenClawNodeError(code: .invalidRequest, message: "INVALID_REQUEST: unknown command")) + case .handlerUnavailable: + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: OpenClawNodeError(code: .unavailable, message: "node handler unavailable")) } } catch { if command.hasPrefix("camera.") { @@ -561,7 +673,8 @@ final class NodeAppModel { } private func isBackgroundRestricted(_ command: String) -> Bool { - command.hasPrefix("canvas.") || command.hasPrefix("camera.") || command.hasPrefix("screen.") + command.hasPrefix("canvas.") || command.hasPrefix("camera.") || command.hasPrefix("screen.") || + command.hasPrefix("talk.") } private func handleLocationInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { @@ -626,6 +739,7 @@ final class NodeAppModel { private func handleCanvasInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { switch req.command { case OpenClawCanvasCommand.present.rawValue: + // iOS ignores placement hints; canvas always fills the screen. let params = (try? Self.decodeParams(OpenClawCanvasPresentParams.self, from: req.paramsJSON)) ?? OpenClawCanvasPresentParams() let url = params.url?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" @@ -636,6 +750,7 @@ final class NodeAppModel { } return BridgeInvokeResponse(id: req.id, ok: true) case OpenClawCanvasCommand.hide.rawValue: + self.screen.showDefaultCanvas() return BridgeInvokeResponse(id: req.id, ok: true) case OpenClawCanvasCommand.navigate.rawValue: let params = try Self.decodeParams(OpenClawCanvasNavigateParams.self, from: req.paramsJSON) @@ -706,7 +821,7 @@ final class NodeAppModel { """) return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) case OpenClawCanvasA2UICommand.push.rawValue, OpenClawCanvasA2UICommand.pushJSONL.rawValue: - let messages: [AnyCodable] + let messages: [OpenClawKit.AnyCodable] if command == OpenClawCanvasA2UICommand.pushJSONL.rawValue { let params = try Self.decodeParams(OpenClawCanvasA2UIPushJSONLParams.self, from: req.paramsJSON) messages = try OpenClawCanvasA2UIJSONL.decodeMessagesFromJSONL(params.jsonl) @@ -859,9 +974,427 @@ final class NodeAppModel { return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) } + private func handleSystemNotify(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + let params = try Self.decodeParams(OpenClawSystemNotifyParams.self, from: req.paramsJSON) + let title = params.title.trimmingCharacters(in: .whitespacesAndNewlines) + let body = params.body.trimmingCharacters(in: .whitespacesAndNewlines) + if title.isEmpty, body.isEmpty { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: OpenClawNodeError(code: .invalidRequest, message: "INVALID_REQUEST: empty notification")) + } + + let finalStatus = await self.requestNotificationAuthorizationIfNeeded() + guard finalStatus == .authorized || finalStatus == .provisional || finalStatus == .ephemeral else { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: OpenClawNodeError(code: .unavailable, message: "NOT_AUTHORIZED: notifications")) + } + + let addResult = await self.runNotificationCall(timeoutSeconds: 2.0) { [notificationCenter] in + let content = UNMutableNotificationContent() + content.title = title + content.body = body + if #available(iOS 15.0, *) { + switch params.priority ?? .active { + case .passive: + content.interruptionLevel = .passive + case .timeSensitive: + content.interruptionLevel = .timeSensitive + case .active: + content.interruptionLevel = .active + } + } + let soundValue = params.sound?.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + if let soundValue, ["none", "silent", "off", "false", "0"].contains(soundValue) { + content.sound = nil + } else { + content.sound = .default + } + let request = UNNotificationRequest( + identifier: UUID().uuidString, + content: content, + trigger: nil) + try await notificationCenter.add(request) + } + if case let .failure(error) = addResult { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: OpenClawNodeError(code: .unavailable, message: "NOTIFICATION_FAILED: \(error.message)")) + } + return BridgeInvokeResponse(id: req.id, ok: true) + } + + private func handleChatPushInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + let params = try Self.decodeParams(OpenClawChatPushParams.self, from: req.paramsJSON) + let text = params.text.trimmingCharacters(in: .whitespacesAndNewlines) + guard !text.isEmpty else { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: OpenClawNodeError(code: .invalidRequest, message: "INVALID_REQUEST: empty chat.push text")) + } + + let finalStatus = await self.requestNotificationAuthorizationIfNeeded() + let messageId = UUID().uuidString + if finalStatus == .authorized || finalStatus == .provisional || finalStatus == .ephemeral { + let addResult = await self.runNotificationCall(timeoutSeconds: 2.0) { [notificationCenter] in + let content = UNMutableNotificationContent() + content.title = "OpenClaw" + content.body = text + content.sound = .default + content.userInfo = ["messageId": messageId] + let request = UNNotificationRequest( + identifier: messageId, + content: content, + trigger: nil) + try await notificationCenter.add(request) + } + if case let .failure(error) = addResult { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: OpenClawNodeError(code: .unavailable, message: "NOTIFICATION_FAILED: \(error.message)")) + } + } + + if params.speak ?? true { + let toSpeak = text + Task { @MainActor in + try? await TalkSystemSpeechSynthesizer.shared.speak(text: toSpeak) + } + } + + let payload = OpenClawChatPushPayload(messageId: messageId) + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + } + + private func requestNotificationAuthorizationIfNeeded() async -> NotificationAuthorizationStatus { + let status = await self.notificationAuthorizationStatus() + guard status == .notDetermined else { return status } + + // Avoid hanging invoke requests if the permission prompt is never answered. + _ = await self.runNotificationCall(timeoutSeconds: 2.0) { [notificationCenter] in + _ = try await notificationCenter.requestAuthorization(options: [.alert, .sound, .badge]) + } + + return await self.notificationAuthorizationStatus() + } + + private func notificationAuthorizationStatus() async -> NotificationAuthorizationStatus { + let result = await self.runNotificationCall(timeoutSeconds: 1.5) { [notificationCenter] in + await notificationCenter.authorizationStatus() + } + switch result { + case let .success(status): + return status + case .failure: + return .denied + } + } + + private func runNotificationCall( + timeoutSeconds: Double, + operation: @escaping @Sendable () async throws -> T + ) async -> Result { + let latch = NotificationInvokeLatch() + var opTask: Task? + var timeoutTask: Task? + defer { + opTask?.cancel() + timeoutTask?.cancel() + } + let clamped = max(0.0, timeoutSeconds) + return await withCheckedContinuation { (cont: CheckedContinuation, Never>) in + latch.setContinuation(cont) + opTask = Task { @MainActor in + do { + let value = try await operation() + latch.resume(.success(value)) + } catch { + latch.resume(.failure(NotificationCallError(message: error.localizedDescription))) + } + } + timeoutTask = Task.detached { + if clamped > 0 { + try? await Task.sleep(nanoseconds: UInt64(clamped * 1_000_000_000)) + } + latch.resume(.failure(NotificationCallError(message: "notification request timed out"))) + } + } + } + + private func handleDeviceInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + switch req.command { + case OpenClawDeviceCommand.status.rawValue: + let payload = try await self.deviceStatusService.status() + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + case OpenClawDeviceCommand.info.rawValue: + let payload = self.deviceStatusService.info() + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + default: + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: OpenClawNodeError(code: .invalidRequest, message: "INVALID_REQUEST: unknown command")) + } + } + + private func handlePhotosInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + let params = (try? Self.decodeParams(OpenClawPhotosLatestParams.self, from: req.paramsJSON)) ?? + OpenClawPhotosLatestParams() + let payload = try await self.photosService.latest(params: params) + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + } + + private func handleContactsInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + switch req.command { + case OpenClawContactsCommand.search.rawValue: + let params = (try? Self.decodeParams(OpenClawContactsSearchParams.self, from: req.paramsJSON)) ?? + OpenClawContactsSearchParams() + let payload = try await self.contactsService.search(params: params) + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + case OpenClawContactsCommand.add.rawValue: + let params = try Self.decodeParams(OpenClawContactsAddParams.self, from: req.paramsJSON) + let payload = try await self.contactsService.add(params: params) + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + default: + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: OpenClawNodeError(code: .invalidRequest, message: "INVALID_REQUEST: unknown command")) + } + } + + private func handleCalendarInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + switch req.command { + case OpenClawCalendarCommand.events.rawValue: + let params = (try? Self.decodeParams(OpenClawCalendarEventsParams.self, from: req.paramsJSON)) ?? + OpenClawCalendarEventsParams() + let payload = try await self.calendarService.events(params: params) + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + case OpenClawCalendarCommand.add.rawValue: + let params = try Self.decodeParams(OpenClawCalendarAddParams.self, from: req.paramsJSON) + let payload = try await self.calendarService.add(params: params) + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + default: + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: OpenClawNodeError(code: .invalidRequest, message: "INVALID_REQUEST: unknown command")) + } + } + + private func handleRemindersInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + switch req.command { + case OpenClawRemindersCommand.list.rawValue: + let params = (try? Self.decodeParams(OpenClawRemindersListParams.self, from: req.paramsJSON)) ?? + OpenClawRemindersListParams() + let payload = try await self.remindersService.list(params: params) + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + case OpenClawRemindersCommand.add.rawValue: + let params = try Self.decodeParams(OpenClawRemindersAddParams.self, from: req.paramsJSON) + let payload = try await self.remindersService.add(params: params) + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + default: + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: OpenClawNodeError(code: .invalidRequest, message: "INVALID_REQUEST: unknown command")) + } + } + + private func handleMotionInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + switch req.command { + case OpenClawMotionCommand.activity.rawValue: + let params = (try? Self.decodeParams(OpenClawMotionActivityParams.self, from: req.paramsJSON)) ?? + OpenClawMotionActivityParams() + let payload = try await self.motionService.activities(params: params) + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + case OpenClawMotionCommand.pedometer.rawValue: + let params = (try? Self.decodeParams(OpenClawPedometerParams.self, from: req.paramsJSON)) ?? + OpenClawPedometerParams() + let payload = try await self.motionService.pedometer(params: params) + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + default: + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: OpenClawNodeError(code: .invalidRequest, message: "INVALID_REQUEST: unknown command")) + } + } + + private func handleTalkInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { + switch req.command { + case OpenClawTalkCommand.pttStart.rawValue: + self.pttVoiceWakeSuspended = self.voiceWake.suspendForExternalAudioCapture() + let payload = try await self.talkMode.beginPushToTalk() + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + case OpenClawTalkCommand.pttStop.rawValue: + let payload = await self.talkMode.endPushToTalk() + self.voiceWake.resumeAfterExternalAudioCapture(wasSuspended: self.pttVoiceWakeSuspended) + self.pttVoiceWakeSuspended = false + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + case OpenClawTalkCommand.pttCancel.rawValue: + let payload = await self.talkMode.cancelPushToTalk() + self.voiceWake.resumeAfterExternalAudioCapture(wasSuspended: self.pttVoiceWakeSuspended) + self.pttVoiceWakeSuspended = false + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + case OpenClawTalkCommand.pttOnce.rawValue: + self.pttVoiceWakeSuspended = self.voiceWake.suspendForExternalAudioCapture() + defer { + self.voiceWake.resumeAfterExternalAudioCapture(wasSuspended: self.pttVoiceWakeSuspended) + self.pttVoiceWakeSuspended = false + } + let payload = try await self.talkMode.runPushToTalkOnce() + let json = try Self.encodePayload(payload) + return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: json) + default: + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: OpenClawNodeError(code: .invalidRequest, message: "INVALID_REQUEST: unknown command")) + } + } + } private extension NodeAppModel { + // Central registry for node invoke routing to keep commands in one place. + func buildCapabilityRouter() -> NodeCapabilityRouter { + var handlers: [String: NodeCapabilityRouter.Handler] = [:] + + func register(_ commands: [String], handler: @escaping NodeCapabilityRouter.Handler) { + for command in commands { + handlers[command] = handler + } + } + + register([OpenClawLocationCommand.get.rawValue]) { [weak self] req in + guard let self else { throw NodeCapabilityRouter.RouterError.handlerUnavailable } + return try await self.handleLocationInvoke(req) + } + + register([ + OpenClawCanvasCommand.present.rawValue, + OpenClawCanvasCommand.hide.rawValue, + OpenClawCanvasCommand.navigate.rawValue, + OpenClawCanvasCommand.evalJS.rawValue, + OpenClawCanvasCommand.snapshot.rawValue, + ]) { [weak self] req in + guard let self else { throw NodeCapabilityRouter.RouterError.handlerUnavailable } + return try await self.handleCanvasInvoke(req) + } + + register([ + OpenClawCanvasA2UICommand.reset.rawValue, + OpenClawCanvasA2UICommand.push.rawValue, + OpenClawCanvasA2UICommand.pushJSONL.rawValue, + ]) { [weak self] req in + guard let self else { throw NodeCapabilityRouter.RouterError.handlerUnavailable } + return try await self.handleCanvasA2UIInvoke(req) + } + + register([ + OpenClawCameraCommand.list.rawValue, + OpenClawCameraCommand.snap.rawValue, + OpenClawCameraCommand.clip.rawValue, + ]) { [weak self] req in + guard let self else { throw NodeCapabilityRouter.RouterError.handlerUnavailable } + return try await self.handleCameraInvoke(req) + } + + register([OpenClawScreenCommand.record.rawValue]) { [weak self] req in + guard let self else { throw NodeCapabilityRouter.RouterError.handlerUnavailable } + return try await self.handleScreenRecordInvoke(req) + } + + register([OpenClawSystemCommand.notify.rawValue]) { [weak self] req in + guard let self else { throw NodeCapabilityRouter.RouterError.handlerUnavailable } + return try await self.handleSystemNotify(req) + } + + register([OpenClawChatCommand.push.rawValue]) { [weak self] req in + guard let self else { throw NodeCapabilityRouter.RouterError.handlerUnavailable } + return try await self.handleChatPushInvoke(req) + } + + register([ + OpenClawDeviceCommand.status.rawValue, + OpenClawDeviceCommand.info.rawValue, + ]) { [weak self] req in + guard let self else { throw NodeCapabilityRouter.RouterError.handlerUnavailable } + return try await self.handleDeviceInvoke(req) + } + + register([OpenClawPhotosCommand.latest.rawValue]) { [weak self] req in + guard let self else { throw NodeCapabilityRouter.RouterError.handlerUnavailable } + return try await self.handlePhotosInvoke(req) + } + + register([ + OpenClawContactsCommand.search.rawValue, + OpenClawContactsCommand.add.rawValue, + ]) { [weak self] req in + guard let self else { throw NodeCapabilityRouter.RouterError.handlerUnavailable } + return try await self.handleContactsInvoke(req) + } + + register([ + OpenClawCalendarCommand.events.rawValue, + OpenClawCalendarCommand.add.rawValue, + ]) { [weak self] req in + guard let self else { throw NodeCapabilityRouter.RouterError.handlerUnavailable } + return try await self.handleCalendarInvoke(req) + } + + register([ + OpenClawRemindersCommand.list.rawValue, + OpenClawRemindersCommand.add.rawValue, + ]) { [weak self] req in + guard let self else { throw NodeCapabilityRouter.RouterError.handlerUnavailable } + return try await self.handleRemindersInvoke(req) + } + + register([ + OpenClawMotionCommand.activity.rawValue, + OpenClawMotionCommand.pedometer.rawValue, + ]) { [weak self] req in + guard let self else { throw NodeCapabilityRouter.RouterError.handlerUnavailable } + return try await self.handleMotionInvoke(req) + } + + register([ + OpenClawTalkCommand.pttStart.rawValue, + OpenClawTalkCommand.pttStop.rawValue, + OpenClawTalkCommand.pttCancel.rawValue, + OpenClawTalkCommand.pttOnce.rawValue, + ]) { [weak self] req in + guard let self else { throw NodeCapabilityRouter.RouterError.handlerUnavailable } + return try await self.handleTalkInvoke(req) + } + + return NodeCapabilityRouter(handlers: handlers) + } + func locationMode() -> OpenClawLocationMode { let raw = UserDefaults.standard.string(forKey: "location.enabledMode") ?? "off" return OpenClawLocationMode(rawValue: raw) ?? .off @@ -920,6 +1453,328 @@ private extension NodeAppModel { } } +extension NodeAppModel { + func connectToGateway( + url: URL, + gatewayStableID: String, + tls: GatewayTLSParams?, + token: String?, + password: String?, + connectOptions: GatewayConnectOptions) + { + let stableID = gatewayStableID.trimmingCharacters(in: .whitespacesAndNewlines) + let effectiveStableID = stableID.isEmpty ? url.absoluteString : stableID + let sessionBox = tls.map { WebSocketSessionBox(session: GatewayTLSPinningSession(params: $0)) } + + self.activeGatewayConnectConfig = GatewayConnectConfig( + url: url, + stableID: stableID, + tls: tls, + token: token, + password: password, + nodeOptions: connectOptions) + self.prepareForGatewayConnect(url: url, stableID: effectiveStableID) + self.startOperatorGatewayLoop( + url: url, + stableID: effectiveStableID, + token: token, + password: password, + nodeOptions: connectOptions, + sessionBox: sessionBox) + self.startNodeGatewayLoop( + url: url, + stableID: effectiveStableID, + token: token, + password: password, + nodeOptions: connectOptions, + sessionBox: sessionBox) + } + + /// Preferred entry-point: apply a single config object and start both sessions. + func applyGatewayConnectConfig(_ cfg: GatewayConnectConfig) { + self.activeGatewayConnectConfig = cfg + self.connectToGateway( + url: cfg.url, + // Preserve the caller-provided stableID (may be empty) and let connectToGateway + // derive the effective stable id consistently for persistence keys. + gatewayStableID: cfg.stableID, + tls: cfg.tls, + token: cfg.token, + password: cfg.password, + connectOptions: cfg.nodeOptions) + } + + func disconnectGateway() { + self.gatewayAutoReconnectEnabled = false + self.nodeGatewayTask?.cancel() + self.nodeGatewayTask = nil + self.operatorGatewayTask?.cancel() + self.operatorGatewayTask = nil + self.voiceWakeSyncTask?.cancel() + self.voiceWakeSyncTask = nil + self.gatewayHealthMonitor.stop() + Task { + await self.operatorGateway.disconnect() + await self.nodeGateway.disconnect() + } + self.gatewayStatusText = "Offline" + self.gatewayServerName = nil + self.gatewayRemoteAddress = nil + self.connectedGatewayID = nil + self.activeGatewayConnectConfig = nil + self.gatewayConnected = false + self.operatorConnected = false + self.talkMode.updateGatewayConnected(false) + self.seamColorHex = nil + self.mainSessionBaseKey = "main" + self.talkMode.updateMainSessionKey(self.mainSessionKey) + self.showLocalCanvasOnDisconnect() + } +} + +private extension NodeAppModel { + func prepareForGatewayConnect(url: URL, stableID: String) { + self.gatewayAutoReconnectEnabled = true + self.nodeGatewayTask?.cancel() + self.operatorGatewayTask?.cancel() + self.gatewayHealthMonitor.stop() + self.gatewayServerName = nil + self.gatewayRemoteAddress = nil + self.connectedGatewayID = stableID + self.gatewayConnected = false + self.operatorConnected = false + self.voiceWakeSyncTask?.cancel() + self.voiceWakeSyncTask = nil + self.gatewayDefaultAgentId = nil + self.gatewayAgents = [] + self.selectedAgentId = GatewaySettingsStore.loadGatewaySelectedAgentId(stableID: stableID) + } + + func startOperatorGatewayLoop( + url: URL, + stableID: String, + token: String?, + password: String?, + nodeOptions: GatewayConnectOptions, + sessionBox: WebSocketSessionBox?) + { + // Operator session reconnects independently (chat/talk/config/voicewake), but we tie its + // lifecycle to the current gateway config so it doesn't keep running across Disconnect. + self.operatorGatewayTask = Task { [weak self] in + guard let self else { return } + var attempt = 0 + while !Task.isCancelled { + if await self.isOperatorConnected() { + try? await Task.sleep(nanoseconds: 1_000_000_000) + continue + } + + let effectiveClientId = + GatewaySettingsStore.loadGatewayClientIdOverride(stableID: stableID) ?? nodeOptions.clientId + let operatorOptions = self.makeOperatorConnectOptions( + clientId: effectiveClientId, + displayName: nodeOptions.clientDisplayName) + + do { + try await self.operatorGateway.connect( + url: url, + token: token, + password: password, + connectOptions: operatorOptions, + sessionBox: sessionBox, + onConnected: { [weak self] in + guard let self else { return } + await MainActor.run { + self.operatorConnected = true + self.talkMode.updateGatewayConnected(true) + } + GatewayDiagnostics.log( + "operator gateway connected host=\(url.host ?? "?") scheme=\(url.scheme ?? "?")") + await self.refreshBrandingFromGateway() + await self.refreshAgentsFromGateway() + await self.startVoiceWakeSync() + await MainActor.run { self.startGatewayHealthMonitor() } + }, + onDisconnected: { [weak self] reason in + guard let self else { return } + await MainActor.run { + self.operatorConnected = false + self.talkMode.updateGatewayConnected(false) + } + GatewayDiagnostics.log("operator gateway disconnected reason=\(reason)") + await MainActor.run { self.stopGatewayHealthMonitor() } + }, + onInvoke: { req in + // Operator session should not handle node.invoke requests. + BridgeInvokeResponse( + id: req.id, + ok: false, + error: OpenClawNodeError( + code: .invalidRequest, + message: "INVALID_REQUEST: operator session cannot invoke node commands")) + }) + + attempt = 0 + try? await Task.sleep(nanoseconds: 1_000_000_000) + } catch { + attempt += 1 + GatewayDiagnostics.log("operator gateway connect error: \(error.localizedDescription)") + let sleepSeconds = min(8.0, 0.5 * pow(1.7, Double(attempt))) + try? await Task.sleep(nanoseconds: UInt64(sleepSeconds * 1_000_000_000)) + } + } + } + } + + func startNodeGatewayLoop( + url: URL, + stableID: String, + token: String?, + password: String?, + nodeOptions: GatewayConnectOptions, + sessionBox: WebSocketSessionBox?) + { + self.nodeGatewayTask = Task { [weak self] in + guard let self else { return } + var attempt = 0 + var currentOptions = nodeOptions + var didFallbackClientId = false + + while !Task.isCancelled { + if await self.isGatewayConnected() { + try? await Task.sleep(nanoseconds: 1_000_000_000) + continue + } + await MainActor.run { + self.gatewayStatusText = (attempt == 0) ? "Connecting…" : "Reconnecting…" + self.gatewayServerName = nil + self.gatewayRemoteAddress = nil + } + + do { + let epochMs = Int(Date().timeIntervalSince1970 * 1000) + GatewayDiagnostics.log("connect attempt epochMs=\(epochMs) url=\(url.absoluteString)") + try await self.nodeGateway.connect( + url: url, + token: token, + password: password, + connectOptions: currentOptions, + sessionBox: sessionBox, + onConnected: { [weak self] in + guard let self else { return } + await MainActor.run { + self.gatewayStatusText = "Connected" + self.gatewayServerName = url.host ?? "gateway" + self.gatewayConnected = true + self.screen.errorText = nil + UserDefaults.standard.set(true, forKey: "gateway.autoconnect") + } + GatewayDiagnostics.log( + "gateway connected host=\(url.host ?? "?") scheme=\(url.scheme ?? "?")") + if let addr = await self.nodeGateway.currentRemoteAddress() { + await MainActor.run { self.gatewayRemoteAddress = addr } + } + await self.showA2UIOnConnectIfNeeded() + }, + onDisconnected: { [weak self] reason in + guard let self else { return } + await MainActor.run { + self.gatewayStatusText = "Disconnected: \(reason)" + self.gatewayServerName = nil + self.gatewayRemoteAddress = nil + self.gatewayConnected = false + self.showLocalCanvasOnDisconnect() + } + GatewayDiagnostics.log("gateway disconnected reason: \(reason)") + }, + onInvoke: { [weak self] req in + guard let self else { + return BridgeInvokeResponse( + id: req.id, + ok: false, + error: OpenClawNodeError( + code: .unavailable, + message: "UNAVAILABLE: node not ready")) + } + return await self.handleInvoke(req) + }) + + attempt = 0 + try? await Task.sleep(nanoseconds: 1_000_000_000) + } catch { + if Task.isCancelled { break } + if !didFallbackClientId, + let fallbackClientId = self.legacyClientIdFallback( + currentClientId: currentOptions.clientId, + error: error) + { + didFallbackClientId = true + currentOptions.clientId = fallbackClientId + GatewaySettingsStore.saveGatewayClientIdOverride( + stableID: stableID, + clientId: fallbackClientId) + await MainActor.run { self.gatewayStatusText = "Gateway rejected client id. Retrying…" } + continue + } + + attempt += 1 + await MainActor.run { + self.gatewayStatusText = "Gateway error: \(error.localizedDescription)" + self.gatewayServerName = nil + self.gatewayRemoteAddress = nil + self.gatewayConnected = false + self.showLocalCanvasOnDisconnect() + } + GatewayDiagnostics.log("gateway connect error: \(error.localizedDescription)") + let sleepSeconds = min(8.0, 0.5 * pow(1.7, Double(attempt))) + try? await Task.sleep(nanoseconds: UInt64(sleepSeconds * 1_000_000_000)) + } + } + + await MainActor.run { + self.gatewayStatusText = "Offline" + self.gatewayServerName = nil + self.gatewayRemoteAddress = nil + self.connectedGatewayID = nil + self.gatewayConnected = false + self.operatorConnected = false + self.talkMode.updateGatewayConnected(false) + self.seamColorHex = nil + self.mainSessionBaseKey = "main" + self.talkMode.updateMainSessionKey(self.mainSessionKey) + self.showLocalCanvasOnDisconnect() + } + } + } + + func makeOperatorConnectOptions(clientId: String, displayName: String?) -> GatewayConnectOptions { + GatewayConnectOptions( + role: "operator", + scopes: ["operator.read", "operator.write", "operator.admin"], + caps: [], + commands: [], + permissions: [:], + clientId: clientId, + clientMode: "ui", + clientDisplayName: displayName, + includeDeviceIdentity: false) + } + + func legacyClientIdFallback(currentClientId: String, error: Error) -> String? { + let normalizedClientId = currentClientId.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + guard normalizedClientId == "openclaw-ios" else { return nil } + let message = error.localizedDescription.lowercased() + guard message.contains("invalid connect params"), message.contains("/client/id") else { + return nil + } + return "moltbot-ios" + } + + func isOperatorConnected() async -> Bool { + self.operatorConnected + } +} + #if DEBUG extension NodeAppModel { func _test_handleInvoke(_ req: BridgeInvokeRequest) async -> BridgeInvokeResponse { @@ -950,10 +1805,6 @@ extension NodeAppModel { await self.handleCanvasA2UIAction(body: body) } - func _test_resolveA2UIHostURL() async -> String? { - await self.resolveA2UIHostURL() - } - func _test_showLocalCanvasOnDisconnect() { self.showLocalCanvasOnDisconnect() } diff --git a/apps/ios/Sources/Motion/MotionService.swift b/apps/ios/Sources/Motion/MotionService.swift new file mode 100644 index 0000000000..f108e0b560 --- /dev/null +++ b/apps/ios/Sources/Motion/MotionService.swift @@ -0,0 +1,100 @@ +import CoreMotion +import Foundation +import OpenClawKit + +final class MotionService: MotionServicing { + func activities(params: OpenClawMotionActivityParams) async throws -> OpenClawMotionActivityPayload { + guard CMMotionActivityManager.isActivityAvailable() else { + throw NSError(domain: "Motion", code: 1, userInfo: [ + NSLocalizedDescriptionKey: "MOTION_UNAVAILABLE: activity not supported on this device", + ]) + } + let auth = CMMotionActivityManager.authorizationStatus() + guard auth == .authorized else { + throw NSError(domain: "Motion", code: 3, userInfo: [ + NSLocalizedDescriptionKey: "MOTION_PERMISSION_REQUIRED: grant Motion & Fitness permission", + ]) + } + + let (start, end) = Self.resolveRange(startISO: params.startISO, endISO: params.endISO) + let limit = max(1, min(params.limit ?? 200, 1000)) + + let manager = CMMotionActivityManager() + let mapped = try await withCheckedThrowingContinuation { (cont: CheckedContinuation<[OpenClawMotionActivityEntry], Error>) in + manager.queryActivityStarting(from: start, to: end, to: OperationQueue()) { activity, error in + if let error { + cont.resume(throwing: error) + } else { + let formatter = ISO8601DateFormatter() + let sliced = Array((activity ?? []).suffix(limit)) + let entries = sliced.map { entry in + OpenClawMotionActivityEntry( + startISO: formatter.string(from: entry.startDate), + endISO: formatter.string(from: end), + confidence: Self.confidenceString(entry.confidence), + isWalking: entry.walking, + isRunning: entry.running, + isCycling: entry.cycling, + isAutomotive: entry.automotive, + isStationary: entry.stationary, + isUnknown: entry.unknown) + } + cont.resume(returning: entries) + } + } + } + + return OpenClawMotionActivityPayload(activities: mapped) + } + + func pedometer(params: OpenClawPedometerParams) async throws -> OpenClawPedometerPayload { + guard CMPedometer.isStepCountingAvailable() else { + throw NSError(domain: "Motion", code: 2, userInfo: [ + NSLocalizedDescriptionKey: "PEDOMETER_UNAVAILABLE: step counting not supported", + ]) + } + let auth = CMPedometer.authorizationStatus() + guard auth == .authorized else { + throw NSError(domain: "Motion", code: 4, userInfo: [ + NSLocalizedDescriptionKey: "MOTION_PERMISSION_REQUIRED: grant Motion & Fitness permission", + ]) + } + + let (start, end) = Self.resolveRange(startISO: params.startISO, endISO: params.endISO) + let pedometer = CMPedometer() + let payload = try await withCheckedThrowingContinuation { (cont: CheckedContinuation) in + pedometer.queryPedometerData(from: start, to: end) { data, error in + if let error { + cont.resume(throwing: error) + } else { + let formatter = ISO8601DateFormatter() + let payload = OpenClawPedometerPayload( + startISO: formatter.string(from: start), + endISO: formatter.string(from: end), + steps: data?.numberOfSteps.intValue, + distanceMeters: data?.distance?.doubleValue, + floorsAscended: data?.floorsAscended?.intValue, + floorsDescended: data?.floorsDescended?.intValue) + cont.resume(returning: payload) + } + } + } + return payload + } + + private static func resolveRange(startISO: String?, endISO: String?) -> (Date, Date) { + let formatter = ISO8601DateFormatter() + let start = startISO.flatMap { formatter.date(from: $0) } ?? Calendar.current.startOfDay(for: Date()) + let end = endISO.flatMap { formatter.date(from: $0) } ?? Date() + return (start, end) + } + + private static func confidenceString(_ confidence: CMMotionActivityConfidence) -> String { + switch confidence { + case .low: "low" + case .medium: "medium" + case .high: "high" + @unknown default: "unknown" + } + } +} diff --git a/apps/ios/Sources/Onboarding/GatewayOnboardingView.swift b/apps/ios/Sources/Onboarding/GatewayOnboardingView.swift new file mode 100644 index 0000000000..18eac23e28 --- /dev/null +++ b/apps/ios/Sources/Onboarding/GatewayOnboardingView.swift @@ -0,0 +1,389 @@ +import Foundation +import SwiftUI + +struct GatewayOnboardingView: View { + var body: some View { + NavigationStack { + List { + Section { + Text("Connect to your gateway to get started.") + .foregroundStyle(.secondary) + } + + Section { + NavigationLink("Auto detect") { + AutoDetectStep() + } + NavigationLink("Manual entry") { + ManualEntryStep() + } + } + } + .navigationTitle("Connect Gateway") + } + } +} + +private struct AutoDetectStep: View { + @Environment(NodeAppModel.self) private var appModel: NodeAppModel + @Environment(GatewayConnectionController.self) private var gatewayController: GatewayConnectionController + @AppStorage("gateway.preferredStableID") private var preferredGatewayStableID: String = "" + @AppStorage("gateway.lastDiscoveredStableID") private var lastDiscoveredGatewayStableID: String = "" + + @State private var connectingGatewayID: String? + @State private var connectStatusText: String? + + var body: some View { + Form { + Section { + Text("We’ll scan for gateways on your network and connect automatically when we find one.") + .foregroundStyle(.secondary) + } + + Section("Connection status") { + ConnectionStatusBox( + statusLines: self.connectionStatusLines(), + secondaryLine: self.connectStatusText) + } + + Section { + Button("Retry") { + self.resetConnectionState() + self.triggerAutoConnect() + } + .disabled(self.connectingGatewayID != nil) + } + } + .navigationTitle("Auto detect") + .onAppear { self.triggerAutoConnect() } + .onChange(of: self.gatewayController.gateways) { _, _ in + self.triggerAutoConnect() + } + } + + private func triggerAutoConnect() { + guard self.appModel.gatewayServerName == nil else { return } + guard self.connectingGatewayID == nil else { return } + guard let candidate = self.autoCandidate() else { return } + + self.connectingGatewayID = candidate.id + Task { + defer { self.connectingGatewayID = nil } + await self.gatewayController.connect(candidate) + } + } + + private func autoCandidate() -> GatewayDiscoveryModel.DiscoveredGateway? { + let preferred = self.preferredGatewayStableID.trimmingCharacters(in: .whitespacesAndNewlines) + let lastDiscovered = self.lastDiscoveredGatewayStableID.trimmingCharacters(in: .whitespacesAndNewlines) + + if !preferred.isEmpty, + let match = self.gatewayController.gateways.first(where: { $0.stableID == preferred }) + { + return match + } + if !lastDiscovered.isEmpty, + let match = self.gatewayController.gateways.first(where: { $0.stableID == lastDiscovered }) + { + return match + } + if self.gatewayController.gateways.count == 1 { + return self.gatewayController.gateways.first + } + return nil + } + + private func connectionStatusLines() -> [String] { + ConnectionStatusBox.defaultLines(appModel: self.appModel, gatewayController: self.gatewayController) + } + + private func resetConnectionState() { + self.appModel.disconnectGateway() + self.connectStatusText = nil + self.connectingGatewayID = nil + } +} + +private struct ManualEntryStep: View { + @Environment(NodeAppModel.self) private var appModel: NodeAppModel + @Environment(GatewayConnectionController.self) private var gatewayController: GatewayConnectionController + + @State private var setupCode: String = "" + @State private var setupStatusText: String? + @State private var manualHost: String = "" + @State private var manualPortText: String = "" + @State private var manualUseTLS: Bool = true + @State private var manualToken: String = "" + @State private var manualPassword: String = "" + + @State private var connectingGatewayID: String? + @State private var connectStatusText: String? + + var body: some View { + Form { + Section("Setup code") { + Text("Use /pair in your bot to get a setup code.") + .font(.footnote) + .foregroundStyle(.secondary) + + TextField("Paste setup code", text: self.$setupCode) + .textInputAutocapitalization(.never) + .autocorrectionDisabled() + + Button("Apply setup code") { + self.applySetupCode() + } + .disabled(self.setupCode.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty) + + if let setupStatusText, !setupStatusText.isEmpty { + Text(setupStatusText) + .font(.footnote) + .foregroundStyle(.secondary) + } + } + + Section { + TextField("Host", text: self.$manualHost) + .textInputAutocapitalization(.never) + .autocorrectionDisabled() + + TextField("Port", text: self.$manualPortText) + .keyboardType(.numberPad) + + Toggle("Use TLS", isOn: self.$manualUseTLS) + + TextField("Gateway token", text: self.$manualToken) + .textInputAutocapitalization(.never) + .autocorrectionDisabled() + + SecureField("Gateway password", text: self.$manualPassword) + .textInputAutocapitalization(.never) + .autocorrectionDisabled() + } + + Section("Connection status") { + ConnectionStatusBox( + statusLines: self.connectionStatusLines(), + secondaryLine: self.connectStatusText) + } + + Section { + Button { + Task { await self.connectManual() } + } label: { + if self.connectingGatewayID == "manual" { + HStack(spacing: 8) { + ProgressView() + .progressViewStyle(.circular) + Text("Connecting…") + } + } else { + Text("Connect") + } + } + .disabled(self.connectingGatewayID != nil) + + Button("Retry") { + self.resetConnectionState() + self.resetManualForm() + } + .disabled(self.connectingGatewayID != nil) + } + } + .navigationTitle("Manual entry") + } + + private func connectManual() async { + let host = self.manualHost.trimmingCharacters(in: .whitespacesAndNewlines) + guard !host.isEmpty else { + self.connectStatusText = "Failed: host required" + return + } + + if let port = self.manualPortValue(), !(1...65535).contains(port) { + self.connectStatusText = "Failed: invalid port" + return + } + + let defaults = UserDefaults.standard + defaults.set(true, forKey: "gateway.manual.enabled") + defaults.set(host, forKey: "gateway.manual.host") + defaults.set(self.manualPortValue() ?? 0, forKey: "gateway.manual.port") + defaults.set(self.manualUseTLS, forKey: "gateway.manual.tls") + + if let instanceId = defaults.string(forKey: "node.instanceId")?.trimmingCharacters(in: .whitespacesAndNewlines), + !instanceId.isEmpty + { + let trimmedToken = self.manualToken.trimmingCharacters(in: .whitespacesAndNewlines) + let trimmedPassword = self.manualPassword.trimmingCharacters(in: .whitespacesAndNewlines) + if !trimmedToken.isEmpty { + GatewaySettingsStore.saveGatewayToken(trimmedToken, instanceId: instanceId) + } + GatewaySettingsStore.saveGatewayPassword(trimmedPassword, instanceId: instanceId) + } + + self.connectingGatewayID = "manual" + defer { self.connectingGatewayID = nil } + await self.gatewayController.connectManual( + host: host, + port: self.manualPortValue() ?? 0, + useTLS: self.manualUseTLS) + } + + private func manualPortValue() -> Int? { + let trimmed = self.manualPortText.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return nil } + return Int(trimmed.filter { $0.isNumber }) + } + + private func connectionStatusLines() -> [String] { + ConnectionStatusBox.defaultLines(appModel: self.appModel, gatewayController: self.gatewayController) + } + + private func resetConnectionState() { + self.appModel.disconnectGateway() + self.connectStatusText = nil + self.connectingGatewayID = nil + } + + private func resetManualForm() { + self.setupCode = "" + self.setupStatusText = nil + self.manualHost = "" + self.manualPortText = "" + self.manualUseTLS = true + self.manualToken = "" + self.manualPassword = "" + } + + private struct SetupPayload: Codable { + var url: String? + var host: String? + var port: Int? + var tls: Bool? + var token: String? + var password: String? + } + + private func applySetupCode() { + let raw = self.setupCode.trimmingCharacters(in: .whitespacesAndNewlines) + guard !raw.isEmpty else { + self.setupStatusText = "Paste a setup code to continue." + return + } + + guard let payload = self.decodeSetupPayload(raw: raw) else { + self.setupStatusText = "Setup code not recognized." + return + } + + if let urlString = payload.url, let url = URL(string: urlString) { + self.applyURL(url) + } else if let host = payload.host, !host.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + self.manualHost = host.trimmingCharacters(in: .whitespacesAndNewlines) + if let port = payload.port { + self.manualPortText = String(port) + } else { + self.manualPortText = "" + } + if let tls = payload.tls { + self.manualUseTLS = tls + } + } else if let url = URL(string: raw), url.scheme != nil { + self.applyURL(url) + } else { + self.setupStatusText = "Setup code missing URL or host." + return + } + + if let token = payload.token, !token.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + self.manualToken = token.trimmingCharacters(in: .whitespacesAndNewlines) + } + if let password = payload.password, !password.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + self.manualPassword = password.trimmingCharacters(in: .whitespacesAndNewlines) + } + + self.setupStatusText = "Setup code applied." + } + + private func applyURL(_ url: URL) { + guard let host = url.host, !host.isEmpty else { return } + self.manualHost = host + if let port = url.port { + self.manualPortText = String(port) + } else { + self.manualPortText = "" + } + let scheme = (url.scheme ?? "").lowercased() + if scheme == "wss" || scheme == "https" { + self.manualUseTLS = true + } else if scheme == "ws" || scheme == "http" { + self.manualUseTLS = false + } + } + + private func decodeSetupPayload(raw: String) -> SetupPayload? { + if let payload = decodeSetupPayloadFromJSON(raw) { + return payload + } + if let decoded = decodeBase64Payload(raw), + let payload = decodeSetupPayloadFromJSON(decoded) + { + return payload + } + return nil + } + + private func decodeSetupPayloadFromJSON(_ json: String) -> SetupPayload? { + guard let data = json.data(using: .utf8) else { return nil } + return try? JSONDecoder().decode(SetupPayload.self, from: data) + } + + private func decodeBase64Payload(_ raw: String) -> String? { + let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return nil } + let normalized = trimmed + .replacingOccurrences(of: "-", with: "+") + .replacingOccurrences(of: "_", with: "/") + let padding = normalized.count % 4 + let padded = padding == 0 ? normalized : normalized + String(repeating: "=", count: 4 - padding) + guard let data = Data(base64Encoded: padded) else { return nil } + return String(data: data, encoding: .utf8) + } +} + +private struct ConnectionStatusBox: View { + let statusLines: [String] + let secondaryLine: String? + + var body: some View { + VStack(alignment: .leading, spacing: 6) { + ForEach(self.statusLines, id: \.self) { line in + Text(line) + .font(.system(size: 12, weight: .regular, design: .monospaced)) + .foregroundStyle(.secondary) + } + if let secondaryLine, !secondaryLine.isEmpty { + Text(secondaryLine) + .font(.footnote) + .foregroundStyle(.secondary) + } + } + .frame(maxWidth: .infinity, alignment: .leading) + .padding(10) + .background(.thinMaterial, in: RoundedRectangle(cornerRadius: 10, style: .continuous)) + } + + static func defaultLines( + appModel: NodeAppModel, + gatewayController: GatewayConnectionController + ) -> [String] { + var lines: [String] = [ + "gateway: \(appModel.gatewayStatusText)", + "discovery: \(gatewayController.discoveryStatusText)", + ] + lines.append("server: \(appModel.gatewayServerName ?? "—")") + lines.append("address: \(appModel.gatewayRemoteAddress ?? "—")") + return lines + } +} diff --git a/apps/ios/Sources/Reminders/RemindersService.swift b/apps/ios/Sources/Reminders/RemindersService.swift new file mode 100644 index 0000000000..36eea52217 --- /dev/null +++ b/apps/ios/Sources/Reminders/RemindersService.swift @@ -0,0 +1,165 @@ +import EventKit +import Foundation +import OpenClawKit + +final class RemindersService: RemindersServicing { + func list(params: OpenClawRemindersListParams) async throws -> OpenClawRemindersListPayload { + let store = EKEventStore() + let status = EKEventStore.authorizationStatus(for: .reminder) + let authorized = await Self.ensureAuthorization(store: store, status: status) + guard authorized else { + throw NSError(domain: "Reminders", code: 1, userInfo: [ + NSLocalizedDescriptionKey: "REMINDERS_PERMISSION_REQUIRED: grant Reminders permission", + ]) + } + + let limit = max(1, min(params.limit ?? 50, 500)) + let statusFilter = params.status ?? .incomplete + + let predicate = store.predicateForReminders(in: nil) + let payload = try await withCheckedThrowingContinuation { (cont: CheckedContinuation<[OpenClawReminderPayload], Error>) in + store.fetchReminders(matching: predicate) { items in + let formatter = ISO8601DateFormatter() + let filtered = (items ?? []).filter { reminder in + switch statusFilter { + case .all: + return true + case .completed: + return reminder.isCompleted + case .incomplete: + return !reminder.isCompleted + } + } + let selected = Array(filtered.prefix(limit)) + let payload = selected.map { reminder in + let due = reminder.dueDateComponents.flatMap { Calendar.current.date(from: $0) } + return OpenClawReminderPayload( + identifier: reminder.calendarItemIdentifier, + title: reminder.title, + dueISO: due.map { formatter.string(from: $0) }, + completed: reminder.isCompleted, + listName: reminder.calendar.title) + } + cont.resume(returning: payload) + } + } + + return OpenClawRemindersListPayload(reminders: payload) + } + + func add(params: OpenClawRemindersAddParams) async throws -> OpenClawRemindersAddPayload { + let store = EKEventStore() + let status = EKEventStore.authorizationStatus(for: .reminder) + let authorized = await Self.ensureWriteAuthorization(store: store, status: status) + guard authorized else { + throw NSError(domain: "Reminders", code: 2, userInfo: [ + NSLocalizedDescriptionKey: "REMINDERS_PERMISSION_REQUIRED: grant Reminders permission", + ]) + } + + let title = params.title.trimmingCharacters(in: .whitespacesAndNewlines) + guard !title.isEmpty else { + throw NSError(domain: "Reminders", code: 3, userInfo: [ + NSLocalizedDescriptionKey: "REMINDERS_INVALID: title required", + ]) + } + + let reminder = EKReminder(eventStore: store) + reminder.title = title + if let notes = params.notes?.trimmingCharacters(in: .whitespacesAndNewlines), !notes.isEmpty { + reminder.notes = notes + } + reminder.calendar = try Self.resolveList( + store: store, + listId: params.listId, + listName: params.listName) + + if let dueISO = params.dueISO?.trimmingCharacters(in: .whitespacesAndNewlines), !dueISO.isEmpty { + let formatter = ISO8601DateFormatter() + guard let dueDate = formatter.date(from: dueISO) else { + throw NSError(domain: "Reminders", code: 4, userInfo: [ + NSLocalizedDescriptionKey: "REMINDERS_INVALID: dueISO must be ISO-8601", + ]) + } + reminder.dueDateComponents = Calendar.current.dateComponents( + [.year, .month, .day, .hour, .minute, .second], + from: dueDate) + } + + try store.save(reminder, commit: true) + + let formatter = ISO8601DateFormatter() + let due = reminder.dueDateComponents.flatMap { Calendar.current.date(from: $0) } + let payload = OpenClawReminderPayload( + identifier: reminder.calendarItemIdentifier, + title: reminder.title, + dueISO: due.map { formatter.string(from: $0) }, + completed: reminder.isCompleted, + listName: reminder.calendar.title) + + return OpenClawRemindersAddPayload(reminder: payload) + } + + private static func ensureAuthorization(store: EKEventStore, status: EKAuthorizationStatus) async -> Bool { + switch status { + case .authorized: + return true + case .notDetermined: + // Don’t prompt during node.invoke; prompts block the invoke and lead to timeouts. + return false + case .restricted, .denied: + return false + case .fullAccess: + return true + case .writeOnly: + return false + @unknown default: + return false + } + } + + private static func ensureWriteAuthorization(store: EKEventStore, status: EKAuthorizationStatus) async -> Bool { + switch status { + case .authorized, .fullAccess, .writeOnly: + return true + case .notDetermined: + // Don’t prompt during node.invoke; prompts block the invoke and lead to timeouts. + return false + case .restricted, .denied: + return false + @unknown default: + return false + } + } + + private static func resolveList( + store: EKEventStore, + listId: String?, + listName: String?) throws -> EKCalendar + { + if let id = listId?.trimmingCharacters(in: .whitespacesAndNewlines), !id.isEmpty, + let calendar = store.calendar(withIdentifier: id) + { + return calendar + } + + if let title = listName?.trimmingCharacters(in: .whitespacesAndNewlines), !title.isEmpty { + if let calendar = store.calendars(for: .reminder).first(where: { + $0.title.compare(title, options: [.caseInsensitive, .diacriticInsensitive]) == .orderedSame + }) { + return calendar + } + throw NSError(domain: "Reminders", code: 5, userInfo: [ + NSLocalizedDescriptionKey: "REMINDERS_LIST_NOT_FOUND: no list named \(title)", + ]) + } + + if let fallback = store.defaultCalendarForNewReminders() { + return fallback + } + + throw NSError(domain: "Reminders", code: 6, userInfo: [ + NSLocalizedDescriptionKey: "REMINDERS_LIST_NOT_FOUND: no default list", + ]) + } +} diff --git a/apps/ios/Sources/RootCanvas.swift b/apps/ios/Sources/RootCanvas.swift index 93cb816273..d3da84cae8 100644 --- a/apps/ios/Sources/RootCanvas.swift +++ b/apps/ios/Sources/RootCanvas.swift @@ -9,9 +9,15 @@ struct RootCanvas: View { @AppStorage(VoiceWakePreferences.enabledKey) private var voiceWakeEnabled: Bool = false @AppStorage("screen.preventSleep") private var preventSleep: Bool = true @AppStorage("canvas.debugStatusEnabled") private var canvasDebugStatusEnabled: Bool = false + @AppStorage("gateway.onboardingComplete") private var onboardingComplete: Bool = false + @AppStorage("gateway.hasConnectedOnce") private var hasConnectedOnce: Bool = false + @AppStorage("gateway.preferredStableID") private var preferredGatewayStableID: String = "" + @AppStorage("gateway.manual.enabled") private var manualGatewayEnabled: Bool = false + @AppStorage("gateway.manual.host") private var manualGatewayHost: String = "" @State private var presentedSheet: PresentedSheet? @State private var voiceWakeToastText: String? @State private var toastDismissTask: Task? + @State private var didAutoOpenSettings: Bool = false private enum PresentedSheet: Identifiable { case settings @@ -52,12 +58,14 @@ struct RootCanvas: View { SettingsTab() case .chat: ChatSheet( - gateway: self.appModel.gatewaySession, + gateway: self.appModel.operatorSession, sessionKey: self.appModel.mainSessionKey, + agentName: self.appModel.activeAgentName, userAccent: self.appModel.seamColor) } } .onAppear { self.updateIdleTimer() } + .onAppear { self.maybeAutoOpenSettings() } .onChange(of: self.preventSleep) { _, _ in self.updateIdleTimer() } .onChange(of: self.scenePhase) { _, _ in self.updateIdleTimer() } .onAppear { self.updateCanvasDebugStatus() } @@ -65,6 +73,13 @@ struct RootCanvas: View { .onChange(of: self.appModel.gatewayStatusText) { _, _ in self.updateCanvasDebugStatus() } .onChange(of: self.appModel.gatewayServerName) { _, _ in self.updateCanvasDebugStatus() } .onChange(of: self.appModel.gatewayRemoteAddress) { _, _ in self.updateCanvasDebugStatus() } + .onChange(of: self.appModel.gatewayServerName) { _, newValue in + if newValue != nil { + self.onboardingComplete = true + self.hasConnectedOnce = true + } + self.maybeAutoOpenSettings() + } .onChange(of: self.voiceWake.lastTriggeredCommand) { _, newValue in guard let newValue else { return } let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) @@ -119,12 +134,33 @@ struct RootCanvas: View { let subtitle = self.appModel.gatewayServerName ?? self.appModel.gatewayRemoteAddress self.appModel.screen.updateDebugStatus(title: title, subtitle: subtitle) } + + private func shouldAutoOpenSettings() -> Bool { + if self.appModel.gatewayServerName != nil { return false } + if !self.hasConnectedOnce { return true } + if !self.onboardingComplete { return true } + return !self.hasExistingGatewayConfig() + } + + private func hasExistingGatewayConfig() -> Bool { + if GatewaySettingsStore.loadLastGatewayConnection() != nil { return true } + let manualHost = self.manualGatewayHost.trimmingCharacters(in: .whitespacesAndNewlines) + return self.manualGatewayEnabled && !manualHost.isEmpty + } + + private func maybeAutoOpenSettings() { + guard !self.didAutoOpenSettings else { return } + guard self.shouldAutoOpenSettings() else { return } + self.didAutoOpenSettings = true + self.presentedSheet = .settings + } } private struct CanvasContent: View { @Environment(NodeAppModel.self) private var appModel @AppStorage("talk.enabled") private var talkEnabled: Bool = false @AppStorage("talk.button.enabled") private var talkButtonEnabled: Bool = true + @State private var showGatewayActions: Bool = false var systemColorScheme: ColorScheme var gatewayStatus: StatusPill.GatewayState var voiceWakeEnabled: Bool @@ -182,7 +218,11 @@ private struct CanvasContent: View { activity: self.statusActivity, brighten: self.brightenButtons, onTap: { - self.openSettings() + if self.gatewayStatus == .connected { + self.showGatewayActions = true + } else { + self.openSettings() + } }) .padding(.leading, 10) .safeAreaPadding(.top, 10) @@ -197,6 +237,21 @@ private struct CanvasContent: View { .transition(.move(edge: .top).combined(with: .opacity)) } } + .confirmationDialog( + "Gateway", + isPresented: self.$showGatewayActions, + titleVisibility: .visible) + { + Button("Disconnect", role: .destructive) { + self.appModel.disconnectGateway() + } + Button("Open Settings") { + self.openSettings() + } + Button("Cancel", role: .cancel) {} + } message: { + Text("Disconnect from the gateway?") + } } private var statusActivity: StatusPill.Activity? { @@ -248,6 +303,10 @@ private struct CanvasContent: View { return StatusPill.Activity(title: "Mic permission", systemImage: "mic.slash", tint: .orange) } if voiceStatus == "Paused" { + // Talk mode intentionally pauses voice wake to release the mic. Don't spam the HUD for that case. + if self.appModel.talkMode.isEnabled { + return nil + } let suffix = self.appModel.isBackgrounded ? " (background)" : "" return StatusPill.Activity(title: "Voice Wake paused\(suffix)", systemImage: "pause.circle.fill") } diff --git a/apps/ios/Sources/RootTabs.swift b/apps/ios/Sources/RootTabs.swift index f7b3fd8226..278e56d615 100644 --- a/apps/ios/Sources/RootTabs.swift +++ b/apps/ios/Sources/RootTabs.swift @@ -7,6 +7,7 @@ struct RootTabs: View { @State private var selectedTab: Int = 0 @State private var voiceWakeToastText: String? @State private var toastDismissTask: Task? + @State private var showGatewayActions: Bool = false var body: some View { TabView(selection: self.$selectedTab) { @@ -27,7 +28,13 @@ struct RootTabs: View { gateway: self.gatewayStatus, voiceWakeEnabled: self.voiceWakeEnabled, activity: self.statusActivity, - onTap: { self.selectedTab = 2 }) + onTap: { + if self.gatewayStatus == .connected { + self.showGatewayActions = true + } else { + self.selectedTab = 2 + } + }) .padding(.leading, 10) .safeAreaPadding(.top, 10) } @@ -62,6 +69,21 @@ struct RootTabs: View { self.toastDismissTask?.cancel() self.toastDismissTask = nil } + .confirmationDialog( + "Gateway", + isPresented: self.$showGatewayActions, + titleVisibility: .visible) + { + Button("Disconnect", role: .destructive) { + self.appModel.disconnectGateway() + } + Button("Open Settings") { + self.selectedTab = 2 + } + Button("Cancel", role: .cancel) {} + } message: { + Text("Disconnect from the gateway?") + } } private var gatewayStatus: StatusPill.GatewayState { @@ -133,6 +155,10 @@ struct RootTabs: View { return StatusPill.Activity(title: "Mic permission", systemImage: "mic.slash", tint: .orange) } if voiceStatus == "Paused" { + // Talk mode intentionally pauses voice wake to release the mic. Don't spam the HUD for that case. + if self.appModel.talkMode.isEnabled { + return nil + } let suffix = self.appModel.isBackgrounded ? " (background)" : "" return StatusPill.Activity(title: "Voice Wake paused\(suffix)", systemImage: "pause.circle.fill") } diff --git a/apps/ios/Sources/RootView.swift b/apps/ios/Sources/RootView.swift new file mode 100644 index 0000000000..b028186533 --- /dev/null +++ b/apps/ios/Sources/RootView.swift @@ -0,0 +1,7 @@ +import SwiftUI + +struct RootView: View { + var body: some View { + RootCanvas() + } +} diff --git a/apps/ios/Sources/Screen/ScreenController.swift b/apps/ios/Sources/Screen/ScreenController.swift index 3fe13a0c98..506b78a230 100644 --- a/apps/ios/Sources/Screen/ScreenController.swift +++ b/apps/ios/Sources/Screen/ScreenController.swift @@ -52,6 +52,20 @@ final class ScreenController { func navigate(to urlString: String) { let trimmed = urlString.trimmingCharacters(in: .whitespacesAndNewlines) + if trimmed.isEmpty { + self.urlString = "" + self.reload() + return + } + if let url = URL(string: trimmed), + !url.isFileURL, + let host = url.host, + Self.isLoopbackHost(host) + { + // Never try to load loopback URLs from a remote gateway. + self.showDefaultCanvas() + return + } self.urlString = (trimmed == "/" ? "" : trimmed) self.reload() } @@ -239,6 +253,18 @@ final class ScreenController { name: "scaffold", ext: "html", subdirectory: "CanvasScaffold") + + private static func isLoopbackHost(_ host: String) -> Bool { + let normalized = host.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + if normalized.isEmpty { return true } + if normalized == "localhost" || normalized == "::1" || normalized == "0.0.0.0" { + return true + } + if normalized == "127.0.0.1" || normalized.hasPrefix("127.") { + return true + } + return false + } func isTrustedCanvasUIURL(_ url: URL) -> Bool { guard url.isFileURL else { return false } let std = url.standardizedFileURL diff --git a/apps/ios/Sources/Screen/ScreenTab.swift b/apps/ios/Sources/Screen/ScreenTab.swift index fd3d0276d3..16b5f85749 100644 --- a/apps/ios/Sources/Screen/ScreenTab.swift +++ b/apps/ios/Sources/Screen/ScreenTab.swift @@ -9,7 +9,9 @@ struct ScreenTab: View { ScreenWebView(controller: self.appModel.screen) .ignoresSafeArea() .overlay(alignment: .top) { - if let errorText = self.appModel.screen.errorText { + if let errorText = self.appModel.screen.errorText, + self.appModel.gatewayServerName == nil + { Text(errorText) .font(.footnote) .padding(10) diff --git a/apps/ios/Sources/Services/NodeServiceProtocols.swift b/apps/ios/Sources/Services/NodeServiceProtocols.swift new file mode 100644 index 0000000000..002c87ad9c --- /dev/null +++ b/apps/ios/Sources/Services/NodeServiceProtocols.swift @@ -0,0 +1,64 @@ +import CoreLocation +import Foundation +import OpenClawKit +import UIKit + +protocol CameraServicing: Sendable { + func listDevices() async -> [CameraController.CameraDeviceInfo] + func snap(params: OpenClawCameraSnapParams) async throws -> (format: String, base64: String, width: Int, height: Int) + func clip(params: OpenClawCameraClipParams) async throws -> (format: String, base64: String, durationMs: Int, hasAudio: Bool) +} + +protocol ScreenRecordingServicing: Sendable { + func record( + screenIndex: Int?, + durationMs: Int?, + fps: Double?, + includeAudio: Bool?, + outPath: String?) async throws -> String +} + +@MainActor +protocol LocationServicing: Sendable { + func authorizationStatus() -> CLAuthorizationStatus + func accuracyAuthorization() -> CLAccuracyAuthorization + func ensureAuthorization(mode: OpenClawLocationMode) async -> CLAuthorizationStatus + func currentLocation( + params: OpenClawLocationGetParams, + desiredAccuracy: OpenClawLocationAccuracy, + maxAgeMs: Int?, + timeoutMs: Int?) async throws -> CLLocation +} + +protocol DeviceStatusServicing: Sendable { + func status() async throws -> OpenClawDeviceStatusPayload + func info() -> OpenClawDeviceInfoPayload +} + +protocol PhotosServicing: Sendable { + func latest(params: OpenClawPhotosLatestParams) async throws -> OpenClawPhotosLatestPayload +} + +protocol ContactsServicing: Sendable { + func search(params: OpenClawContactsSearchParams) async throws -> OpenClawContactsSearchPayload + func add(params: OpenClawContactsAddParams) async throws -> OpenClawContactsAddPayload +} + +protocol CalendarServicing: Sendable { + func events(params: OpenClawCalendarEventsParams) async throws -> OpenClawCalendarEventsPayload + func add(params: OpenClawCalendarAddParams) async throws -> OpenClawCalendarAddPayload +} + +protocol RemindersServicing: Sendable { + func list(params: OpenClawRemindersListParams) async throws -> OpenClawRemindersListPayload + func add(params: OpenClawRemindersAddParams) async throws -> OpenClawRemindersAddPayload +} + +protocol MotionServicing: Sendable { + func activities(params: OpenClawMotionActivityParams) async throws -> OpenClawMotionActivityPayload + func pedometer(params: OpenClawPedometerParams) async throws -> OpenClawPedometerPayload +} + +extension CameraController: CameraServicing {} +extension ScreenRecordService: ScreenRecordingServicing {} +extension LocationService: LocationServicing {} diff --git a/apps/ios/Sources/Services/NotificationService.swift b/apps/ios/Sources/Services/NotificationService.swift new file mode 100644 index 0000000000..348e93edc6 --- /dev/null +++ b/apps/ios/Sources/Services/NotificationService.swift @@ -0,0 +1,58 @@ +import Foundation +import UserNotifications + +enum NotificationAuthorizationStatus: Sendable { + case notDetermined + case denied + case authorized + case provisional + case ephemeral +} + +protocol NotificationCentering: Sendable { + func authorizationStatus() async -> NotificationAuthorizationStatus + func requestAuthorization(options: UNAuthorizationOptions) async throws -> Bool + func add(_ request: UNNotificationRequest) async throws +} + +struct LiveNotificationCenter: NotificationCentering, @unchecked Sendable { + private let center: UNUserNotificationCenter + + init(center: UNUserNotificationCenter = .current()) { + self.center = center + } + + func authorizationStatus() async -> NotificationAuthorizationStatus { + let settings = await self.center.notificationSettings() + return switch settings.authorizationStatus { + case .authorized: + .authorized + case .provisional: + .provisional + case .ephemeral: + .ephemeral + case .denied: + .denied + case .notDetermined: + .notDetermined + @unknown default: + .denied + } + } + + func requestAuthorization(options: UNAuthorizationOptions) async throws -> Bool { + try await self.center.requestAuthorization(options: options) + } + + func add(_ request: UNNotificationRequest) async throws { + try await withCheckedThrowingContinuation { (cont: CheckedContinuation) in + self.center.add(request) { error in + if let error { + cont.resume(throwing: error) + } else { + cont.resume(returning: ()) + } + } + } + } +} diff --git a/apps/ios/Sources/SessionKey.swift b/apps/ios/Sources/SessionKey.swift index bac73f670d..89798b6a29 100644 --- a/apps/ios/Sources/SessionKey.swift +++ b/apps/ios/Sources/SessionKey.swift @@ -6,6 +6,14 @@ enum SessionKey { return trimmed.isEmpty ? "main" : trimmed } + static func makeAgentSessionKey(agentId: String, baseKey: String) -> String { + let trimmedAgent = agentId.trimmingCharacters(in: .whitespacesAndNewlines) + let trimmedBase = baseKey.trimmingCharacters(in: .whitespacesAndNewlines) + if trimmedAgent.isEmpty { return trimmedBase.isEmpty ? "main" : trimmedBase } + let normalizedBase = trimmedBase.isEmpty ? "main" : trimmedBase + return "agent:\(trimmedAgent):\(normalizedBase)" + } + static func isCanonicalMainSessionKey(_ value: String?) -> Bool { let trimmed = (value ?? "").trimmingCharacters(in: .whitespacesAndNewlines) if trimmed.isEmpty { return false } diff --git a/apps/ios/Sources/Settings/SettingsTab.swift b/apps/ios/Sources/Settings/SettingsTab.swift index c1ee609948..6267f621c5 100644 --- a/apps/ios/Sources/Settings/SettingsTab.swift +++ b/apps/ios/Sources/Settings/SettingsTab.swift @@ -1,17 +1,10 @@ import OpenClawKit import Network import Observation +import os import SwiftUI import UIKit -@MainActor -@Observable -private final class ConnectStatusStore { - var text: String? -} - -extension ConnectStatusStore: @unchecked Sendable {} - struct SettingsTab: View { @Environment(NodeAppModel.self) private var appModel: NodeAppModel @Environment(VoiceWakeManager.self) private var voiceWake: VoiceWakeManager @@ -28,99 +21,140 @@ struct SettingsTab: View { @AppStorage("screen.preventSleep") private var preventSleep: Bool = true @AppStorage("gateway.preferredStableID") private var preferredGatewayStableID: String = "" @AppStorage("gateway.lastDiscoveredStableID") private var lastDiscoveredGatewayStableID: String = "" + @AppStorage("gateway.autoconnect") private var gatewayAutoConnect: Bool = false @AppStorage("gateway.manual.enabled") private var manualGatewayEnabled: Bool = false @AppStorage("gateway.manual.host") private var manualGatewayHost: String = "" @AppStorage("gateway.manual.port") private var manualGatewayPort: Int = 18789 @AppStorage("gateway.manual.tls") private var manualGatewayTLS: Bool = true @AppStorage("gateway.discovery.debugLogs") private var discoveryDebugLogsEnabled: Bool = false @AppStorage("canvas.debugStatusEnabled") private var canvasDebugStatusEnabled: Bool = false - @State private var connectStatus = ConnectStatusStore() @State private var connectingGatewayID: String? @State private var localIPAddress: String? @State private var lastLocationModeRaw: String = OpenClawLocationMode.off.rawValue @State private var gatewayToken: String = "" @State private var gatewayPassword: String = "" + @AppStorage("gateway.setupCode") private var setupCode: String = "" + @State private var setupStatusText: String? + @State private var manualGatewayPortText: String = "" + @State private var gatewayExpanded: Bool = true + @State private var selectedAgentPickerId: String = "" + + private let gatewayLogger = Logger(subsystem: "ai.openclaw.ios", category: "GatewaySettings") var body: some View { NavigationStack { Form { - Section("Node") { - TextField("Name", text: self.$displayName) - Text(self.instanceId) - .font(.footnote) - .foregroundStyle(.secondary) - LabeledContent("IP", value: self.localIPAddress ?? "—") - .contextMenu { - if let ip = self.localIPAddress { - Button { - UIPasteboard.general.string = ip - } label: { - Label("Copy", systemImage: "doc.on.doc") + Section { + DisclosureGroup(isExpanded: self.$gatewayExpanded) { + if !self.isGatewayConnected { + Text( + "1. Open Telegram and message your bot: /pair\n" + + "2. Copy the setup code it returns\n" + + "3. Paste here and tap Connect\n" + + "4. Back in Telegram, run /pair approve") + .font(.footnote) + .foregroundStyle(.secondary) + + if let warning = self.tailnetWarningText { + Text(warning) + .font(.footnote.weight(.semibold)) + .foregroundStyle(.orange) + } + + TextField("Paste setup code", text: self.$setupCode) + .textInputAutocapitalization(.never) + .autocorrectionDisabled() + + Button { + Task { await self.applySetupCodeAndConnect() } + } label: { + if self.connectingGatewayID == "manual" { + HStack(spacing: 8) { + ProgressView() + .progressViewStyle(.circular) + Text("Connecting…") + } + } else { + Text("Connect with setup code") } } + .disabled(self.connectingGatewayID != nil + || self.setupCode.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty) + + if let status = self.setupStatusLine { + Text(status) + .font(.footnote) + .foregroundStyle(.secondary) + } } - LabeledContent("Platform", value: self.platformString()) - LabeledContent("Version", value: self.appVersion()) - LabeledContent("Model", value: self.modelIdentifier()) - } - Section("Gateway") { - LabeledContent("Discovery", value: self.gatewayController.discoveryStatusText) - LabeledContent("Status", value: self.appModel.gatewayStatusText) - if let serverName = self.appModel.gatewayServerName { - LabeledContent("Server", value: serverName) - if let addr = self.appModel.gatewayRemoteAddress { - let parts = Self.parseHostPort(from: addr) - let urlString = Self.httpURLString(host: parts?.host, port: parts?.port, fallback: addr) - LabeledContent("Address") { - Text(urlString) - } - .contextMenu { - Button { - UIPasteboard.general.string = urlString - } label: { - Label("Copy URL", systemImage: "doc.on.doc") + if self.isGatewayConnected { + Picker("Bot", selection: self.$selectedAgentPickerId) { + Text("Default").tag("") + let defaultId = (self.appModel.gatewayDefaultAgentId ?? "") + .trimmingCharacters(in: .whitespacesAndNewlines) + ForEach(self.appModel.gatewayAgents.filter { $0.id != defaultId }, id: \.id) { agent in + let name = (agent.name ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + Text(name.isEmpty ? agent.id : name).tag(agent.id) } + } + Text("Controls which bot Chat and Talk speak to.") + .font(.footnote) + .foregroundStyle(.secondary) + } - if let parts { + DisclosureGroup("Advanced") { + if self.appModel.gatewayServerName == nil { + LabeledContent("Discovery", value: self.gatewayController.discoveryStatusText) + } + LabeledContent("Status", value: self.appModel.gatewayStatusText) + Toggle("Auto-connect on launch", isOn: self.$gatewayAutoConnect) + + if let serverName = self.appModel.gatewayServerName { + LabeledContent("Server", value: serverName) + if let addr = self.appModel.gatewayRemoteAddress { + let parts = Self.parseHostPort(from: addr) + let urlString = Self.httpURLString(host: parts?.host, port: parts?.port, fallback: addr) + LabeledContent("Address") { + Text(urlString) + } + .contextMenu { Button { - UIPasteboard.general.string = parts.host + UIPasteboard.general.string = urlString } label: { - Label("Copy Host", systemImage: "doc.on.doc") + Label("Copy URL", systemImage: "doc.on.doc") } - Button { - UIPasteboard.general.string = "\(parts.port)" - } label: { - Label("Copy Port", systemImage: "doc.on.doc") + if let parts { + Button { + UIPasteboard.general.string = parts.host + } label: { + Label("Copy Host", systemImage: "doc.on.doc") + } + + Button { + UIPasteboard.general.string = "\(parts.port)" + } label: { + Label("Copy Port", systemImage: "doc.on.doc") + } } } } + + Button("Disconnect", role: .destructive) { + self.appModel.disconnectGateway() + } + } else { + self.gatewayList(showing: .all) } - Button("Disconnect", role: .destructive) { - self.appModel.disconnectGateway() - } - - self.gatewayList(showing: .availableOnly) - } else { - self.gatewayList(showing: .all) - } - - if let text = self.connectStatus.text { - Text(text) - .font(.footnote) - .foregroundStyle(.secondary) - } - - DisclosureGroup("Advanced") { Toggle("Use Manual Gateway", isOn: self.$manualGatewayEnabled) TextField("Host", text: self.$manualGatewayHost) .textInputAutocapitalization(.never) .autocorrectionDisabled() - TextField("Port", value: self.$manualGatewayPort, format: .number) + TextField("Port (optional)", text: self.manualPortBinding) .keyboardType(.numberPad) Toggle("Use TLS", isOn: self.$manualGatewayTLS) @@ -140,11 +174,11 @@ struct SettingsTab: View { } .disabled(self.connectingGatewayID != nil || self.manualGatewayHost .trimmingCharacters(in: .whitespacesAndNewlines) - .isEmpty || self.manualGatewayPort <= 0 || self.manualGatewayPort > 65535) + .isEmpty || !self.manualPortIsValid) Text( "Use this when mDNS/Bonjour discovery is blocked. " - + "The gateway WebSocket listens on port 18789 by default.") + + "Leave port empty for 443 on tailnet DNS (TLS) or 18789 otherwise.") .font(.footnote) .foregroundStyle(.secondary) @@ -164,58 +198,98 @@ struct SettingsTab: View { .autocorrectionDisabled() SecureField("Gateway Password", text: self.$gatewayPassword) + + VStack(alignment: .leading, spacing: 6) { + Text("Debug") + .font(.footnote.weight(.semibold)) + .foregroundStyle(.secondary) + Text(self.gatewayDebugText()) + .font(.system(size: 12, weight: .regular, design: .monospaced)) + .foregroundStyle(.secondary) + .frame(maxWidth: .infinity, alignment: .leading) + .padding(10) + .background(.thinMaterial, in: RoundedRectangle(cornerRadius: 10, style: .continuous)) + } } - } - - Section("Voice") { - Toggle("Voice Wake", isOn: self.$voiceWakeEnabled) - .onChange(of: self.voiceWakeEnabled) { _, newValue in - self.appModel.setVoiceWakeEnabled(newValue) - } - Toggle("Talk Mode", isOn: self.$talkEnabled) - .onChange(of: self.talkEnabled) { _, newValue in - self.appModel.setTalkEnabled(newValue) - } - // Keep this separate so users can hide the side bubble without disabling Talk Mode. - Toggle("Show Talk Button", isOn: self.$talkButtonEnabled) - - NavigationLink { - VoiceWakeWordsSettingsView() } label: { - LabeledContent( - "Wake Words", - value: VoiceWakePreferences.displayString(for: self.voiceWake.triggerWords)) + HStack(spacing: 10) { + Circle() + .fill(self.isGatewayConnected ? Color.green : Color.secondary.opacity(0.35)) + .frame(width: 10, height: 10) + Text("Gateway") + Spacer() + Text(self.gatewaySummaryText) + .font(.footnote) + .foregroundStyle(.secondary) + } } } - Section("Camera") { - Toggle("Allow Camera", isOn: self.$cameraEnabled) - Text("Allows the gateway to request photos or short video clips (foreground only).") - .font(.footnote) - .foregroundStyle(.secondary) - } + Section("Device") { + DisclosureGroup("Features") { + Toggle("Voice Wake", isOn: self.$voiceWakeEnabled) + .onChange(of: self.voiceWakeEnabled) { _, newValue in + self.appModel.setVoiceWakeEnabled(newValue) + } + Toggle("Talk Mode", isOn: self.$talkEnabled) + .onChange(of: self.talkEnabled) { _, newValue in + self.appModel.setTalkEnabled(newValue) + } + // Keep this separate so users can hide the side bubble without disabling Talk Mode. + Toggle("Show Talk Button", isOn: self.$talkButtonEnabled) - Section("Location") { - Picker("Location Access", selection: self.$locationEnabledModeRaw) { - Text("Off").tag(OpenClawLocationMode.off.rawValue) - Text("While Using").tag(OpenClawLocationMode.whileUsing.rawValue) - Text("Always").tag(OpenClawLocationMode.always.rawValue) + NavigationLink { + VoiceWakeWordsSettingsView() + } label: { + LabeledContent( + "Wake Words", + value: VoiceWakePreferences.displayString(for: self.voiceWake.triggerWords)) + } + + Toggle("Allow Camera", isOn: self.$cameraEnabled) + Text("Allows the gateway to request photos or short video clips (foreground only).") + .font(.footnote) + .foregroundStyle(.secondary) + + Picker("Location Access", selection: self.$locationEnabledModeRaw) { + Text("Off").tag(OpenClawLocationMode.off.rawValue) + Text("While Using").tag(OpenClawLocationMode.whileUsing.rawValue) + Text("Always").tag(OpenClawLocationMode.always.rawValue) + } + .pickerStyle(.segmented) + + Toggle("Precise Location", isOn: self.$locationPreciseEnabled) + .disabled(self.locationMode == .off) + + Text("Always requires system permission and may prompt to open Settings.") + .font(.footnote) + .foregroundStyle(.secondary) + + Toggle("Prevent Sleep", isOn: self.$preventSleep) + Text("Keeps the screen awake while OpenClaw is open.") + .font(.footnote) + .foregroundStyle(.secondary) } - .pickerStyle(.segmented) - Toggle("Precise Location", isOn: self.$locationPreciseEnabled) - .disabled(self.locationMode == .off) - - Text("Always requires system permission and may prompt to open Settings.") - .font(.footnote) - .foregroundStyle(.secondary) - } - - Section("Screen") { - Toggle("Prevent Sleep", isOn: self.$preventSleep) - Text("Keeps the screen awake while OpenClaw is open.") - .font(.footnote) - .foregroundStyle(.secondary) + DisclosureGroup("Device Info") { + TextField("Name", text: self.$displayName) + Text(self.instanceId) + .font(.footnote) + .foregroundStyle(.secondary) + LabeledContent("IP", value: self.localIPAddress ?? "—") + .contextMenu { + if let ip = self.localIPAddress { + Button { + UIPasteboard.general.string = ip + } label: { + Label("Copy", systemImage: "doc.on.doc") + } + } + } + LabeledContent("Platform", value: self.platformString()) + LabeledContent("Version", value: self.appVersion()) + LabeledContent("Model", value: self.modelIdentifier()) + } } } .navigationTitle("Settings") @@ -232,11 +306,24 @@ struct SettingsTab: View { .onAppear { self.localIPAddress = Self.primaryIPv4Address() self.lastLocationModeRaw = self.locationEnabledModeRaw + self.syncManualPortText() let trimmedInstanceId = self.instanceId.trimmingCharacters(in: .whitespacesAndNewlines) if !trimmedInstanceId.isEmpty { self.gatewayToken = GatewaySettingsStore.loadGatewayToken(instanceId: trimmedInstanceId) ?? "" self.gatewayPassword = GatewaySettingsStore.loadGatewayPassword(instanceId: trimmedInstanceId) ?? "" } + // Keep setup front-and-center when disconnected; keep things compact once connected. + self.gatewayExpanded = !self.isGatewayConnected + self.selectedAgentPickerId = self.appModel.selectedAgentId ?? "" + } + .onChange(of: self.selectedAgentPickerId) { _, newValue in + let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) + self.appModel.setSelectedAgentId(trimmed.isEmpty ? nil : trimmed) + } + .onChange(of: self.appModel.selectedAgentId ?? "") { _, newValue in + if newValue != self.selectedAgentPickerId { + self.selectedAgentPickerId = newValue + } } .onChange(of: self.preferredGatewayStableID) { _, newValue in let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) @@ -255,8 +342,24 @@ struct SettingsTab: View { guard !instanceId.isEmpty else { return } GatewaySettingsStore.saveGatewayPassword(trimmed, instanceId: instanceId) } - .onChange(of: self.appModel.gatewayServerName) { _, _ in - self.connectStatus.text = nil + .onChange(of: self.manualGatewayPort) { _, _ in + self.syncManualPortText() + } + .onChange(of: self.appModel.gatewayServerName) { _, newValue in + if newValue != nil { + self.setupCode = "" + self.setupStatusText = nil + return + } + if self.manualGatewayEnabled { + self.setupStatusText = self.appModel.gatewayStatusText + } + } + .onChange(of: self.appModel.gatewayStatusText) { _, newValue in + guard self.manualGatewayEnabled || self.connectingGatewayID == "manual" else { return } + let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return } + self.setupStatusText = trimmed } .onChange(of: self.locationEnabledModeRaw) { _, newValue in let previous = self.lastLocationModeRaw @@ -278,8 +381,24 @@ struct SettingsTab: View { @ViewBuilder private func gatewayList(showing: GatewayListMode) -> some View { if self.gatewayController.gateways.isEmpty { - Text("No gateways found yet.") - .foregroundStyle(.secondary) + VStack(alignment: .leading, spacing: 12) { + Text("No gateways found yet.") + .foregroundStyle(.secondary) + Text("If your gateway is on another network, connect it and ensure DNS is working.") + .font(.footnote) + .foregroundStyle(.secondary) + + if let lastKnown = GatewaySettingsStore.loadLastGatewayConnection() { + Button { + Task { await self.connectLastKnown() } + } label: { + self.lastKnownButtonLabel(host: lastKnown.host, port: lastKnown.port) + } + .disabled(self.connectingGatewayID != nil) + .buttonStyle(.borderedProminent) + .tint(self.appModel.seamColor) + } + } } else { let connectedID = self.appModel.connectedGatewayID let rows = self.gatewayController.gateways.filter { gateway in @@ -331,6 +450,20 @@ struct SettingsTab: View { case availableOnly } + private var isGatewayConnected: Bool { + let status = self.appModel.gatewayStatusText.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + if status.contains("connected") { return true } + return self.appModel.gatewayServerName != nil && !status.contains("offline") + } + + private var gatewaySummaryText: String { + if let server = self.appModel.gatewayServerName, self.isGatewayConnected { + return server + } + let trimmed = self.appModel.gatewayStatusText.trimmingCharacters(in: .whitespacesAndNewlines) + return trimmed.isEmpty ? "Not connected" : trimmed + } + private func platformString() -> String { let v = ProcessInfo.processInfo.operatingSystemVersion return "iOS \(v.majorVersion).\(v.minorVersion).\(v.patchVersion)" @@ -377,14 +510,290 @@ struct SettingsTab: View { await self.gatewayController.connect(gateway) } + private func connectLastKnown() async { + self.connectingGatewayID = "last-known" + defer { self.connectingGatewayID = nil } + await self.gatewayController.connectLastKnown() + } + + private func gatewayDebugText() -> String { + var lines: [String] = [ + "gateway: \(self.appModel.gatewayStatusText)", + "discovery: \(self.gatewayController.discoveryStatusText)", + ] + lines.append("server: \(self.appModel.gatewayServerName ?? "—")") + lines.append("address: \(self.appModel.gatewayRemoteAddress ?? "—")") + if let last = self.gatewayController.discoveryDebugLog.last?.message { + lines.append("discovery log: \(last)") + } + return lines.joined(separator: "\n") + } + + @ViewBuilder + private func lastKnownButtonLabel(host: String, port: Int) -> some View { + if self.connectingGatewayID == "last-known" { + HStack(spacing: 8) { + ProgressView() + .progressViewStyle(.circular) + Text("Connecting…") + } + .frame(maxWidth: .infinity) + } else { + HStack(spacing: 8) { + Image(systemName: "bolt.horizontal.circle.fill") + VStack(alignment: .leading, spacing: 2) { + Text("Connect last known") + Text("\(host):\(port)") + .font(.footnote) + .foregroundStyle(.secondary) + } + Spacer() + } + .frame(maxWidth: .infinity) + } + } + + private var manualPortBinding: Binding { + Binding( + get: { self.manualGatewayPortText }, + set: { newValue in + let filtered = newValue.filter(\.isNumber) + if self.manualGatewayPortText != filtered { + self.manualGatewayPortText = filtered + } + if filtered.isEmpty { + if self.manualGatewayPort != 0 { + self.manualGatewayPort = 0 + } + } else if let port = Int(filtered), self.manualGatewayPort != port { + self.manualGatewayPort = port + } + }) + } + + private var manualPortIsValid: Bool { + if self.manualGatewayPortText.isEmpty { return true } + return self.manualGatewayPort >= 1 && self.manualGatewayPort <= 65535 + } + + private func syncManualPortText() { + if self.manualGatewayPort > 0 { + let next = String(self.manualGatewayPort) + if self.manualGatewayPortText != next { + self.manualGatewayPortText = next + } + } else if !self.manualGatewayPortText.isEmpty { + self.manualGatewayPortText = "" + } + } + + private struct SetupPayload: Codable { + var url: String? + var host: String? + var port: Int? + var tls: Bool? + var token: String? + var password: String? + } + + private func applySetupCodeAndConnect() async { + self.setupStatusText = nil + guard self.applySetupCode() else { return } + let host = self.manualGatewayHost.trimmingCharacters(in: .whitespacesAndNewlines) + let resolvedPort = self.resolvedManualPort(host: host) + let hasToken = !self.gatewayToken.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty + let hasPassword = !self.gatewayPassword.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty + GatewayDiagnostics.log( + "setup code applied host=\(host) port=\(resolvedPort ?? -1) tls=\(self.manualGatewayTLS) token=\(hasToken) password=\(hasPassword)") + guard let port = resolvedPort else { + self.setupStatusText = "Failed: invalid port" + return + } + let ok = await self.preflightGateway(host: host, port: port, useTLS: self.manualGatewayTLS) + guard ok else { return } + self.setupStatusText = "Setup code applied. Connecting…" + await self.connectManual() + } + + @discardableResult + private func applySetupCode() -> Bool { + let raw = self.setupCode.trimmingCharacters(in: .whitespacesAndNewlines) + guard !raw.isEmpty else { + self.setupStatusText = "Paste a setup code to continue." + return false + } + + guard let payload = self.decodeSetupPayload(raw: raw) else { + self.setupStatusText = "Setup code not recognized." + return false + } + + if let urlString = payload.url, let url = URL(string: urlString) { + self.applySetupURL(url) + } else if let host = payload.host, !host.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + self.manualGatewayHost = host.trimmingCharacters(in: .whitespacesAndNewlines) + if let port = payload.port { + self.manualGatewayPort = port + self.manualGatewayPortText = String(port) + } else { + self.manualGatewayPort = 0 + self.manualGatewayPortText = "" + } + if let tls = payload.tls { + self.manualGatewayTLS = tls + } + } else if let url = URL(string: raw), url.scheme != nil { + self.applySetupURL(url) + } else { + self.setupStatusText = "Setup code missing URL or host." + return false + } + + let trimmedInstanceId = self.instanceId.trimmingCharacters(in: .whitespacesAndNewlines) + if let token = payload.token, !token.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + let trimmedToken = token.trimmingCharacters(in: .whitespacesAndNewlines) + self.gatewayToken = trimmedToken + if !trimmedInstanceId.isEmpty { + GatewaySettingsStore.saveGatewayToken(trimmedToken, instanceId: trimmedInstanceId) + } + } + if let password = payload.password, !password.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + let trimmedPassword = password.trimmingCharacters(in: .whitespacesAndNewlines) + self.gatewayPassword = trimmedPassword + if !trimmedInstanceId.isEmpty { + GatewaySettingsStore.saveGatewayPassword(trimmedPassword, instanceId: trimmedInstanceId) + } + } + + return true + } + + private func applySetupURL(_ url: URL) { + guard let host = url.host, !host.isEmpty else { return } + self.manualGatewayHost = host + if let port = url.port { + self.manualGatewayPort = port + self.manualGatewayPortText = String(port) + } else { + self.manualGatewayPort = 0 + self.manualGatewayPortText = "" + } + let scheme = (url.scheme ?? "").lowercased() + if scheme == "wss" || scheme == "https" { + self.manualGatewayTLS = true + } else if scheme == "ws" || scheme == "http" { + self.manualGatewayTLS = false + } + } + + private func resolvedManualPort(host: String) -> Int? { + if self.manualGatewayPort > 0 { + return self.manualGatewayPort <= 65535 ? self.manualGatewayPort : nil + } + let trimmed = host.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return nil } + if self.manualGatewayTLS && trimmed.lowercased().hasSuffix(".ts.net") { + return 443 + } + return 18789 + } + + private func preflightGateway(host: String, port: Int, useTLS: Bool) async -> Bool { + let trimmed = host.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return false } + + if Self.isTailnetHostOrIP(trimmed) && !Self.hasTailnetIPv4() { + let msg = "Tailscale is off on this iPhone. Turn it on, then try again." + self.setupStatusText = msg + GatewayDiagnostics.log("preflight fail: tailnet missing host=\(trimmed)") + self.gatewayLogger.warning("\(msg, privacy: .public)") + return false + } + + self.setupStatusText = "Checking gateway reachability…" + let ok = await Self.probeTCP(host: trimmed, port: port, timeoutSeconds: 3) + if !ok { + let msg = "Can't reach gateway at \(trimmed):\(port). Check Tailscale or LAN." + self.setupStatusText = msg + GatewayDiagnostics.log("preflight fail: unreachable host=\(trimmed) port=\(port)") + self.gatewayLogger.warning("\(msg, privacy: .public)") + return false + } + GatewayDiagnostics.log("preflight ok host=\(trimmed) port=\(port) tls=\(useTLS)") + return true + } + + private static func probeTCP(host: String, port: Int, timeoutSeconds: Double) async -> Bool { + guard let nwPort = NWEndpoint.Port(rawValue: UInt16(port)) else { return false } + let endpointHost = NWEndpoint.Host(host) + let connection = NWConnection(host: endpointHost, port: nwPort, using: .tcp) + return await withCheckedContinuation { cont in + let queue = DispatchQueue(label: "gateway.preflight") + let finished = OSAllocatedUnfairLock(initialState: false) + let finish: @Sendable (Bool) -> Void = { ok in + let shouldResume = finished.withLock { flag -> Bool in + if flag { return false } + flag = true + return true + } + guard shouldResume else { return } + connection.cancel() + cont.resume(returning: ok) + } + connection.stateUpdateHandler = { state in + switch state { + case .ready: + finish(true) + case .failed, .cancelled: + finish(false) + default: + break + } + } + connection.start(queue: queue) + queue.asyncAfter(deadline: .now() + timeoutSeconds) { + finish(false) + } + } + } + + private func decodeSetupPayload(raw: String) -> SetupPayload? { + if let payload = decodeSetupPayloadFromJSON(raw) { + return payload + } + if let decoded = decodeBase64Payload(raw), + let payload = decodeSetupPayloadFromJSON(decoded) + { + return payload + } + return nil + } + + private func decodeSetupPayloadFromJSON(_ json: String) -> SetupPayload? { + guard let data = json.data(using: .utf8) else { return nil } + return try? JSONDecoder().decode(SetupPayload.self, from: data) + } + + private func decodeBase64Payload(_ raw: String) -> String? { + let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return nil } + let normalized = trimmed + .replacingOccurrences(of: "-", with: "+") + .replacingOccurrences(of: "_", with: "/") + let padding = normalized.count % 4 + let padded = padding == 0 ? normalized : normalized + String(repeating: "=", count: 4 - padding) + guard let data = Data(base64Encoded: padded) else { return nil } + return String(data: data, encoding: .utf8) + } + private func connectManual() async { let host = self.manualGatewayHost.trimmingCharacters(in: .whitespacesAndNewlines) guard !host.isEmpty else { - self.connectStatus.text = "Failed: host required" + self.setupStatusText = "Failed: host required" return } - guard self.manualGatewayPort > 0, self.manualGatewayPort <= 65535 else { - self.connectStatus.text = "Failed: invalid port" + guard self.manualPortIsValid else { + self.setupStatusText = "Failed: invalid port" return } @@ -392,12 +801,54 @@ struct SettingsTab: View { self.manualGatewayEnabled = true defer { self.connectingGatewayID = nil } + GatewayDiagnostics.log( + "connect manual host=\(host) port=\(self.manualGatewayPort) tls=\(self.manualGatewayTLS)") await self.gatewayController.connectManual( host: host, port: self.manualGatewayPort, useTLS: self.manualGatewayTLS) } + private var setupStatusLine: String? { + let trimmedSetup = self.setupStatusText?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + let gatewayStatus = self.appModel.gatewayStatusText.trimmingCharacters(in: .whitespacesAndNewlines) + if let friendly = self.friendlyGatewayMessage(from: gatewayStatus) { return friendly } + if let friendly = self.friendlyGatewayMessage(from: trimmedSetup) { return friendly } + if !trimmedSetup.isEmpty { return trimmedSetup } + if gatewayStatus.isEmpty || gatewayStatus == "Offline" { return nil } + return gatewayStatus + } + + private var tailnetWarningText: String? { + let host = self.manualGatewayHost.trimmingCharacters(in: .whitespacesAndNewlines) + guard !host.isEmpty else { return nil } + guard Self.isTailnetHostOrIP(host) else { return nil } + guard !Self.hasTailnetIPv4() else { return nil } + return "This gateway is on your tailnet. Turn on Tailscale on this iPhone, then tap Connect." + } + + private func friendlyGatewayMessage(from raw: String) -> String? { + let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return nil } + let lower = trimmed.lowercased() + if lower.contains("pairing required") { + return "Pairing required. Go back to Telegram and run /pair approve, then tap Connect again." + } + if lower.contains("device nonce required") || lower.contains("device nonce mismatch") { + return "Secure handshake failed. Make sure Tailscale is connected, then tap Connect again." + } + if lower.contains("device signature expired") || lower.contains("device signature invalid") { + return "Secure handshake failed. Check that your iPhone time is correct, then tap Connect again." + } + if lower.contains("connect timed out") || lower.contains("timed out") { + return "Connection timed out. Make sure Tailscale is connected, then try again." + } + if lower.contains("unauthorized role") { + return "Connected, but some controls are restricted for nodes. This is expected." + } + return nil + } + private static func primaryIPv4Address() -> String? { var addrList: UnsafeMutablePointer? guard getifaddrs(&addrList) == 0, let first = addrList else { return nil } @@ -436,6 +887,57 @@ struct SettingsTab: View { return en0 ?? fallback } + private static func hasTailnetIPv4() -> Bool { + var addrList: UnsafeMutablePointer? + guard getifaddrs(&addrList) == 0, let first = addrList else { return false } + defer { freeifaddrs(addrList) } + + for ptr in sequence(first: first, next: { $0.pointee.ifa_next }) { + let flags = Int32(ptr.pointee.ifa_flags) + let isUp = (flags & IFF_UP) != 0 + let isLoopback = (flags & IFF_LOOPBACK) != 0 + let family = ptr.pointee.ifa_addr.pointee.sa_family + if !isUp || isLoopback || family != UInt8(AF_INET) { continue } + + var addr = ptr.pointee.ifa_addr.pointee + var buffer = [CChar](repeating: 0, count: Int(NI_MAXHOST)) + let result = getnameinfo( + &addr, + socklen_t(ptr.pointee.ifa_addr.pointee.sa_len), + &buffer, + socklen_t(buffer.count), + nil, + 0, + NI_NUMERICHOST) + guard result == 0 else { continue } + let len = buffer.prefix { $0 != 0 } + let bytes = len.map { UInt8(bitPattern: $0) } + guard let ip = String(bytes: bytes, encoding: .utf8) else { continue } + if self.isTailnetIPv4(ip) { return true } + } + + return false + } + + private static func isTailnetHostOrIP(_ host: String) -> Bool { + let trimmed = host.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + if trimmed.hasSuffix(".ts.net") || trimmed.hasSuffix(".ts.net.") { + return true + } + return self.isTailnetIPv4(trimmed) + } + + private static func isTailnetIPv4(_ ip: String) -> Bool { + let parts = ip.split(separator: ".") + guard parts.count == 4 else { return false } + let octets = parts.compactMap { Int($0) } + guard octets.count == 4 else { return false } + let a = octets[0] + let b = octets[1] + guard (0...255).contains(a), (0...255).contains(b) else { return false } + return a == 100 && b >= 64 && b <= 127 + } + private static func parseHostPort(from address: String) -> SettingsHostPort? { SettingsNetworkingHelpers.parseHostPort(from: address) } diff --git a/apps/ios/Sources/Voice/TalkModeManager.swift b/apps/ios/Sources/Voice/TalkModeManager.swift index d3adb49e1b..0400fd2884 100644 --- a/apps/ios/Sources/Voice/TalkModeManager.swift +++ b/apps/ios/Sources/Voice/TalkModeManager.swift @@ -1,4 +1,5 @@ import AVFAudio +import OpenClawChatUI import OpenClawKit import OpenClawProtocol import Foundation @@ -6,6 +7,10 @@ import Observation import OSLog import Speech +// This file intentionally centralizes talk mode state + behavior. +// It's large, and splitting would force `private` -> `fileprivate` across many members. +// We'll refactor into smaller files when the surface stabilizes. +// swiftlint:disable type_body_length @MainActor @Observable final class TalkModeManager: NSObject { @@ -14,9 +19,29 @@ final class TalkModeManager: NSObject { var isEnabled: Bool = false var isListening: Bool = false var isSpeaking: Bool = false + var isPushToTalkActive: Bool = false var statusText: String = "Off" + /// 0..1-ish (not calibrated). Intended for UI feedback only. + var micLevel: Double = 0 + + private enum CaptureMode { + case idle + case continuous + case pushToTalk + } + + private var captureMode: CaptureMode = .idle + private var resumeContinuousAfterPTT: Bool = false + private var activePTTCaptureId: String? + private var pttAutoStopEnabled: Bool = false + private var pttCompletion: CheckedContinuation? + private var pttTimeoutTask: Task? + + private let allowSimulatorCapture: Bool private let audioEngine = AVAudioEngine() + private var inputTapInstalled = false + private var audioTapDiagnostics: AudioTapDiagnostics? private var speechRecognizer: SFSpeechRecognizer? private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? private var recognitionTask: SFSpeechRecognitionTask? @@ -24,6 +49,7 @@ final class TalkModeManager: NSObject { private var lastHeard: Date? private var lastTranscript: String = "" + private var loggedPartialThisCycle: Bool = false private var lastSpokenText: String? private var lastInterruptedAtSeconds: Double? @@ -44,21 +70,57 @@ final class TalkModeManager: NSObject { var mp3Player: StreamingAudioPlaying = StreamingAudioPlayer.shared private var gateway: GatewayNodeSession? - private let silenceWindow: TimeInterval = 0.7 + private var gatewayConnected = false + private let silenceWindow: TimeInterval = 0.9 + private var lastAudioActivity: Date? + private var noiseFloorSamples: [Double] = [] + private var noiseFloor: Double? + private var noiseFloorReady: Bool = false private var chatSubscribedSessionKeys = Set() + private var incrementalSpeechQueue: [String] = [] + private var incrementalSpeechTask: Task? + private var incrementalSpeechActive = false + private var incrementalSpeechUsed = false + private var incrementalSpeechLanguage: String? + private var incrementalSpeechBuffer = IncrementalSpeechBuffer() + private var incrementalSpeechContext: IncrementalSpeechContext? + private var incrementalSpeechDirective: TalkDirective? private let logger = Logger(subsystem: "bot.molt", category: "TalkMode") + init(allowSimulatorCapture: Bool = false) { + self.allowSimulatorCapture = allowSimulatorCapture + super.init() + } + func attachGateway(_ gateway: GatewayNodeSession) { self.gateway = gateway } + func updateGatewayConnected(_ connected: Bool) { + self.gatewayConnected = connected + if connected { + // If talk mode is enabled before the gateway connects (common on cold start), + // kick recognition once we're online so the UI doesn’t stay “Offline”. + if self.isEnabled, !self.isListening, self.captureMode != .pushToTalk { + Task { await self.start() } + } + } else { + if self.isEnabled, !self.isSpeaking { + self.statusText = "Offline" + } + } + } + func updateMainSessionKey(_ sessionKey: String?) { let trimmed = (sessionKey ?? "").trimmingCharacters(in: .whitespacesAndNewlines) guard !trimmed.isEmpty else { return } - if SessionKey.isCanonicalMainSessionKey(self.mainSessionKey) { return } + if trimmed == self.mainSessionKey { return } self.mainSessionKey = trimmed + if self.gatewayConnected, self.isEnabled { + Task { await self.subscribeChatIfNeeded(sessionKey: trimmed) } + } } func setEnabled(_ enabled: Bool) { @@ -74,26 +136,37 @@ final class TalkModeManager: NSObject { func start() async { guard self.isEnabled else { return } + guard self.captureMode != .pushToTalk else { return } if self.isListening { return } + guard self.gatewayConnected else { + self.statusText = "Offline" + return + } self.logger.info("start") self.statusText = "Requesting permissions…" let micOk = await Self.requestMicrophonePermission() guard micOk else { self.logger.warning("start blocked: microphone permission denied") - self.statusText = "Microphone permission denied" + self.statusText = Self.permissionMessage( + kind: "Microphone", + status: AVAudioSession.sharedInstance().recordPermission) return } let speechOk = await Self.requestSpeechPermission() guard speechOk else { self.logger.warning("start blocked: speech permission denied") - self.statusText = "Speech recognition permission denied" + self.statusText = Self.permissionMessage( + kind: "Speech recognition", + status: SFSpeechRecognizer.authorizationStatus()) return } await self.reloadConfig() do { try Self.configureAudioSession() + // Set this before starting recognition so any early speech errors are classified correctly. + self.captureMode = .continuous try self.startRecognition() self.isListening = true self.statusText = "Listening" @@ -110,6 +183,8 @@ final class TalkModeManager: NSObject { func stop() { self.isEnabled = false self.isListening = false + self.isPushToTalkActive = false + self.captureMode = .idle self.statusText = "Off" self.lastTranscript = "" self.lastHeard = nil @@ -118,6 +193,20 @@ final class TalkModeManager: NSObject { self.stopRecognition() self.stopSpeaking() self.lastInterruptedAtSeconds = nil + let pendingPTT = self.pttCompletion != nil + let pendingCaptureId = self.activePTTCaptureId ?? UUID().uuidString + self.pttTimeoutTask?.cancel() + self.pttTimeoutTask = nil + self.pttAutoStopEnabled = false + if pendingPTT { + let payload = OpenClawTalkPTTStopPayload( + captureId: pendingCaptureId, + transcript: nil, + status: "cancelled") + self.finishPTTOnce(payload) + } + self.resumeContinuousAfterPTT = false + self.activePTTCaptureId = nil TalkSystemSpeechSynthesizer.shared.stop() do { try AVAudioSession.sharedInstance().setActive(false, options: [.notifyOthersOnDeactivation]) @@ -127,15 +216,256 @@ final class TalkModeManager: NSObject { Task { await self.unsubscribeAllChats() } } + /// Suspends microphone usage without disabling Talk Mode. + /// Used when the app backgrounds (or when we need to temporarily release the mic). + func suspendForBackground() -> Bool { + guard self.isEnabled else { return false } + let wasActive = self.isListening || self.isSpeaking || self.isPushToTalkActive + + self.isListening = false + self.isPushToTalkActive = false + self.captureMode = .idle + self.statusText = "Paused" + self.lastTranscript = "" + self.lastHeard = nil + self.silenceTask?.cancel() + self.silenceTask = nil + + self.stopRecognition() + self.stopSpeaking() + self.lastInterruptedAtSeconds = nil + TalkSystemSpeechSynthesizer.shared.stop() + + do { + try AVAudioSession.sharedInstance().setActive(false, options: [.notifyOthersOnDeactivation]) + } catch { + self.logger.warning("audio session deactivate failed: \(error.localizedDescription, privacy: .public)") + } + + Task { await self.unsubscribeAllChats() } + return wasActive + } + + func resumeAfterBackground(wasSuspended: Bool) async { + guard wasSuspended else { return } + guard self.isEnabled else { return } + await self.start() + } + func userTappedOrb() { self.stopSpeaking() } + func beginPushToTalk() async throws -> OpenClawTalkPTTStartPayload { + guard self.gatewayConnected else { + self.statusText = "Offline" + throw NSError(domain: "TalkMode", code: 7, userInfo: [ + NSLocalizedDescriptionKey: "Gateway not connected", + ]) + } + if self.isPushToTalkActive, let captureId = self.activePTTCaptureId { + return OpenClawTalkPTTStartPayload(captureId: captureId) + } + + self.stopSpeaking(storeInterruption: false) + self.pttTimeoutTask?.cancel() + self.pttTimeoutTask = nil + self.pttAutoStopEnabled = false + + self.resumeContinuousAfterPTT = self.isEnabled && self.captureMode == .continuous + self.silenceTask?.cancel() + self.silenceTask = nil + self.stopRecognition() + self.isListening = false + + let captureId = UUID().uuidString + self.activePTTCaptureId = captureId + self.lastTranscript = "" + self.lastHeard = nil + + self.statusText = "Requesting permissions…" + if !self.allowSimulatorCapture { + let micOk = await Self.requestMicrophonePermission() + guard micOk else { + self.statusText = Self.permissionMessage( + kind: "Microphone", + status: AVAudioSession.sharedInstance().recordPermission) + throw NSError(domain: "TalkMode", code: 4, userInfo: [ + NSLocalizedDescriptionKey: "Microphone permission denied", + ]) + } + let speechOk = await Self.requestSpeechPermission() + guard speechOk else { + self.statusText = Self.permissionMessage( + kind: "Speech recognition", + status: SFSpeechRecognizer.authorizationStatus()) + throw NSError(domain: "TalkMode", code: 5, userInfo: [ + NSLocalizedDescriptionKey: "Speech recognition permission denied", + ]) + } + } + + do { + try Self.configureAudioSession() + self.captureMode = .pushToTalk + try self.startRecognition() + self.isListening = true + self.isPushToTalkActive = true + self.statusText = "Listening (PTT)" + } catch { + self.isListening = false + self.isPushToTalkActive = false + self.captureMode = .idle + self.statusText = "Start failed: \(error.localizedDescription)" + throw error + } + + return OpenClawTalkPTTStartPayload(captureId: captureId) + } + + func endPushToTalk() async -> OpenClawTalkPTTStopPayload { + let captureId = self.activePTTCaptureId ?? UUID().uuidString + guard self.isPushToTalkActive else { + let payload = OpenClawTalkPTTStopPayload( + captureId: captureId, + transcript: nil, + status: "idle") + self.finishPTTOnce(payload) + return payload + } + + self.isPushToTalkActive = false + self.isListening = false + self.captureMode = .idle + self.stopRecognition() + self.pttTimeoutTask?.cancel() + self.pttTimeoutTask = nil + self.pttAutoStopEnabled = false + + let transcript = self.lastTranscript.trimmingCharacters(in: .whitespacesAndNewlines) + self.lastTranscript = "" + self.lastHeard = nil + + guard !transcript.isEmpty else { + self.statusText = "Ready" + if self.resumeContinuousAfterPTT { + await self.start() + } + self.resumeContinuousAfterPTT = false + self.activePTTCaptureId = nil + let payload = OpenClawTalkPTTStopPayload( + captureId: captureId, + transcript: nil, + status: "empty") + self.finishPTTOnce(payload) + return payload + } + + guard self.gatewayConnected else { + self.statusText = "Gateway not connected" + if self.resumeContinuousAfterPTT { + await self.start() + } + self.resumeContinuousAfterPTT = false + self.activePTTCaptureId = nil + let payload = OpenClawTalkPTTStopPayload( + captureId: captureId, + transcript: transcript, + status: "offline") + self.finishPTTOnce(payload) + return payload + } + + self.statusText = "Thinking…" + Task { @MainActor in + await self.processTranscript(transcript, restartAfter: self.resumeContinuousAfterPTT) + } + self.resumeContinuousAfterPTT = false + self.activePTTCaptureId = nil + let payload = OpenClawTalkPTTStopPayload( + captureId: captureId, + transcript: transcript, + status: "queued") + self.finishPTTOnce(payload) + return payload + } + + func runPushToTalkOnce(maxDurationSeconds: TimeInterval = 12) async throws -> OpenClawTalkPTTStopPayload { + if self.pttCompletion != nil { + _ = await self.cancelPushToTalk() + } + + if self.isPushToTalkActive { + let captureId = self.activePTTCaptureId ?? UUID().uuidString + return OpenClawTalkPTTStopPayload( + captureId: captureId, + transcript: nil, + status: "busy") + } + + _ = try await self.beginPushToTalk() + + return await withCheckedContinuation { cont in + self.pttCompletion = cont + self.pttAutoStopEnabled = true + self.startSilenceMonitor() + self.schedulePTTTimeout(seconds: maxDurationSeconds) + } + } + + func cancelPushToTalk() async -> OpenClawTalkPTTStopPayload { + let captureId = self.activePTTCaptureId ?? UUID().uuidString + guard self.isPushToTalkActive else { + let payload = OpenClawTalkPTTStopPayload( + captureId: captureId, + transcript: nil, + status: "idle") + self.finishPTTOnce(payload) + self.pttAutoStopEnabled = false + self.pttTimeoutTask?.cancel() + self.pttTimeoutTask = nil + self.resumeContinuousAfterPTT = false + self.activePTTCaptureId = nil + return payload + } + + let shouldResume = self.resumeContinuousAfterPTT + self.isPushToTalkActive = false + self.isListening = false + self.captureMode = .idle + self.stopRecognition() + self.lastTranscript = "" + self.lastHeard = nil + self.pttAutoStopEnabled = false + self.pttTimeoutTask?.cancel() + self.pttTimeoutTask = nil + self.resumeContinuousAfterPTT = false + self.activePTTCaptureId = nil + self.statusText = "Ready" + + let payload = OpenClawTalkPTTStopPayload( + captureId: captureId, + transcript: nil, + status: "cancelled") + self.finishPTTOnce(payload) + + if shouldResume { + await self.start() + } + return payload + } + private func startRecognition() throws { #if targetEnvironment(simulator) - throw NSError(domain: "TalkMode", code: 2, userInfo: [ - NSLocalizedDescriptionKey: "Talk mode is not supported on the iOS simulator", - ]) + if !self.allowSimulatorCapture { + throw NSError(domain: "TalkMode", code: 2, userInfo: [ + NSLocalizedDescriptionKey: "Talk mode is not supported on the iOS simulator", + ]) + } else { + self.recognitionRequest = SFSpeechAudioBufferRecognitionRequest() + self.recognitionRequest?.shouldReportPartialResults = true + return + } #endif self.stopRecognition() @@ -148,57 +478,157 @@ final class TalkModeManager: NSObject { self.recognitionRequest = SFSpeechAudioBufferRecognitionRequest() self.recognitionRequest?.shouldReportPartialResults = true + self.recognitionRequest?.taskHint = .dictation guard let request = self.recognitionRequest else { return } + GatewayDiagnostics.log("talk audio: session \(Self.describeAudioSession())") + let input = self.audioEngine.inputNode - let format = input.outputFormat(forBus: 0) + let format = input.inputFormat(forBus: 0) guard format.sampleRate > 0, format.channelCount > 0 else { throw NSError(domain: "TalkMode", code: 3, userInfo: [ NSLocalizedDescriptionKey: "Invalid audio input format", ]) } input.removeTap(onBus: 0) - let tapBlock = Self.makeAudioTapAppendCallback(request: request) + let tapDiagnostics = AudioTapDiagnostics(label: "talk") { [weak self] level in + guard let self else { return } + Task { @MainActor in + // Smooth + clamp for UI, and keep it cheap. + let raw = max(0, min(Double(level) * 10.0, 1.0)) + let next = (self.micLevel * 0.80) + (raw * 0.20) + self.micLevel = next + + // Dynamic thresholding so background noise doesn’t prevent endpointing. + if self.isListening, !self.isSpeaking, !self.noiseFloorReady { + self.noiseFloorSamples.append(raw) + if self.noiseFloorSamples.count >= 22 { + let sorted = self.noiseFloorSamples.sorted() + let take = max(6, sorted.count / 2) + let slice = sorted.prefix(take) + let avg = slice.reduce(0.0, +) / Double(slice.count) + self.noiseFloor = avg + self.noiseFloorReady = true + self.noiseFloorSamples.removeAll(keepingCapacity: true) + let threshold = min(0.35, max(0.12, avg + 0.10)) + GatewayDiagnostics.log( + "talk audio: noiseFloor=\(String(format: "%.3f", avg)) threshold=\(String(format: "%.3f", threshold))") + } + } + + let threshold: Double = if let floor = self.noiseFloor, self.noiseFloorReady { + min(0.35, max(0.12, floor + 0.10)) + } else { + 0.18 + } + if raw >= threshold { + self.lastAudioActivity = Date() + } + } + } + self.audioTapDiagnostics = tapDiagnostics + let tapBlock = Self.makeAudioTapAppendCallback(request: request, diagnostics: tapDiagnostics) input.installTap(onBus: 0, bufferSize: 2048, format: format, block: tapBlock) + self.inputTapInstalled = true self.audioEngine.prepare() try self.audioEngine.start() + self.loggedPartialThisCycle = false + GatewayDiagnostics.log( + "talk speech: recognition started mode=\(String(describing: self.captureMode)) engineRunning=\(self.audioEngine.isRunning)") self.recognitionTask = recognizer.recognitionTask(with: request) { [weak self] result, error in guard let self else { return } if let error { + let msg = error.localizedDescription + GatewayDiagnostics.log("talk speech: error=\(msg)") if !self.isSpeaking { - self.statusText = "Speech error: \(error.localizedDescription)" + if msg.localizedCaseInsensitiveContains("no speech detected") { + // Treat as transient silence. Don't scare users with an error banner. + self.statusText = self.isEnabled ? "Listening" : "Speech error: \(msg)" + } else { + self.statusText = "Speech error: \(msg)" + } + } + self.logger.debug("speech recognition error: \(msg, privacy: .public)") + // Speech recognition can terminate on transient errors (e.g. no speech detected). + // If talk mode is enabled and we're in continuous capture, try to restart. + if self.captureMode == .continuous, self.isEnabled, !self.isSpeaking { + // Treat the task as terminal on error so we don't get stuck with a dead recognizer. + self.stopRecognition() + Task { @MainActor [weak self] in + await self?.restartRecognitionAfterError() + } } - self.logger.debug("speech recognition error: \(error.localizedDescription, privacy: .public)") } guard let result else { return } let transcript = result.bestTranscription.formattedString + if !result.isFinal, !self.loggedPartialThisCycle { + let trimmed = transcript.trimmingCharacters(in: .whitespacesAndNewlines) + if !trimmed.isEmpty { + self.loggedPartialThisCycle = true + GatewayDiagnostics.log("talk speech: partial chars=\(trimmed.count)") + } + } Task { @MainActor in await self.handleTranscript(transcript: transcript, isFinal: result.isFinal) } } } + private func restartRecognitionAfterError() async { + guard self.isEnabled, self.captureMode == .continuous else { return } + // Avoid thrashing the audio engine if it’s already running. + if self.recognitionTask != nil, self.audioEngine.isRunning { return } + try? await Task.sleep(nanoseconds: 250_000_000) + guard self.isEnabled, self.captureMode == .continuous else { return } + do { + try Self.configureAudioSession() + try self.startRecognition() + self.isListening = true + if self.statusText.localizedCaseInsensitiveContains("speech error") { + self.statusText = "Listening" + } + GatewayDiagnostics.log("talk speech: recognition restarted") + } catch { + let msg = error.localizedDescription + GatewayDiagnostics.log("talk speech: restart failed error=\(msg)") + } + } + private func stopRecognition() { self.recognitionTask?.cancel() self.recognitionTask = nil self.recognitionRequest?.endAudio() self.recognitionRequest = nil - self.audioEngine.inputNode.removeTap(onBus: 0) + self.micLevel = 0 + self.lastAudioActivity = nil + self.noiseFloorSamples.removeAll(keepingCapacity: true) + self.noiseFloor = nil + self.noiseFloorReady = false + self.audioTapDiagnostics = nil + if self.inputTapInstalled { + self.audioEngine.inputNode.removeTap(onBus: 0) + self.inputTapInstalled = false + } self.audioEngine.stop() self.speechRecognizer = nil } - private nonisolated static func makeAudioTapAppendCallback(request: SpeechRequest) -> AVAudioNodeTapBlock { + private nonisolated static func makeAudioTapAppendCallback( + request: SpeechRequest, + diagnostics: AudioTapDiagnostics) -> AVAudioNodeTapBlock + { { buffer, _ in request.append(buffer) + diagnostics.onBuffer(buffer) } } private func handleTranscript(transcript: String, isFinal: Bool) async { let trimmed = transcript.trimmingCharacters(in: .whitespacesAndNewlines) - if self.isSpeaking, self.interruptOnSpeech { + let ttsActive = self.isSpeechOutputActive + if ttsActive, self.interruptOnSpeech { if self.shouldInterrupt(with: trimmed) { self.stopSpeaking() } @@ -212,6 +642,16 @@ final class TalkModeManager: NSObject { } if isFinal { self.lastTranscript = trimmed + guard !trimmed.isEmpty else { return } + GatewayDiagnostics.log("talk speech: final transcript chars=\(trimmed.count)") + self.loggedPartialThisCycle = false + if self.captureMode == .pushToTalk, self.pttAutoStopEnabled, self.isPushToTalkActive { + _ = await self.endPushToTalk() + return + } + if self.captureMode == .continuous, !self.isSpeechOutputActive { + await self.processTranscript(trimmed, restartAfter: true) + } } } @@ -219,7 +659,7 @@ final class TalkModeManager: NSObject { self.silenceTask?.cancel() self.silenceTask = Task { [weak self] in guard let self else { return } - while self.isEnabled { + while self.isEnabled || (self.isPushToTalkActive && self.pttAutoStopEnabled) { try? await Task.sleep(nanoseconds: 200_000_000) await self.checkSilence() } @@ -227,27 +667,67 @@ final class TalkModeManager: NSObject { } private func checkSilence() async { - guard self.isListening, !self.isSpeaking else { return } + if self.captureMode == .continuous { + guard self.isListening, !self.isSpeechOutputActive else { return } + let transcript = self.lastTranscript.trimmingCharacters(in: .whitespacesAndNewlines) + guard !transcript.isEmpty else { return } + let lastActivity = [self.lastHeard, self.lastAudioActivity].compactMap { $0 }.max() + guard let lastActivity else { return } + if Date().timeIntervalSince(lastActivity) < self.silenceWindow { return } + await self.processTranscript(transcript, restartAfter: true) + return + } + + guard self.captureMode == .pushToTalk, self.pttAutoStopEnabled else { return } + guard self.isListening, !self.isSpeaking, self.isPushToTalkActive else { return } let transcript = self.lastTranscript.trimmingCharacters(in: .whitespacesAndNewlines) guard !transcript.isEmpty else { return } - guard let lastHeard else { return } - if Date().timeIntervalSince(lastHeard) < self.silenceWindow { return } - await self.finalizeTranscript(transcript) + let lastActivity = [self.lastHeard, self.lastAudioActivity].compactMap { $0 }.max() + guard let lastActivity else { return } + if Date().timeIntervalSince(lastActivity) < self.silenceWindow { return } + _ = await self.endPushToTalk() } - private func finalizeTranscript(_ transcript: String) async { + // Guardrail for PTT once so we don't stay open indefinitely. + private func schedulePTTTimeout(seconds: TimeInterval) { + guard seconds > 0 else { return } + let nanos = UInt64(seconds * 1_000_000_000) + self.pttTimeoutTask?.cancel() + self.pttTimeoutTask = Task { [weak self] in + try? await Task.sleep(nanoseconds: nanos) + await self?.handlePTTTimeout() + } + } + + private func handlePTTTimeout() async { + guard self.pttAutoStopEnabled, self.isPushToTalkActive else { return } + _ = await self.endPushToTalk() + } + + private func finishPTTOnce(_ payload: OpenClawTalkPTTStopPayload) { + guard let continuation = self.pttCompletion else { return } + self.pttCompletion = nil + continuation.resume(returning: payload) + } + + private func processTranscript(_ transcript: String, restartAfter: Bool) async { self.isListening = false + self.captureMode = .idle self.statusText = "Thinking…" self.lastTranscript = "" self.lastHeard = nil self.stopRecognition() + GatewayDiagnostics.log("talk: process transcript chars=\(transcript.count) restartAfter=\(restartAfter)") await self.reloadConfig() let prompt = self.buildPrompt(transcript: transcript) - guard let gateway else { + guard self.gatewayConnected, let gateway else { self.statusText = "Gateway not connected" self.logger.warning("finalize: gateway not connected") - await self.start() + GatewayDiagnostics.log("talk: abort gateway not connected") + if restartAfter { + await self.start() + } return } @@ -257,42 +737,78 @@ final class TalkModeManager: NSObject { await self.subscribeChatIfNeeded(sessionKey: sessionKey) self.logger.info( "chat.send start sessionKey=\(sessionKey, privacy: .public) chars=\(prompt.count, privacy: .public)") + GatewayDiagnostics.log("talk: chat.send start sessionKey=\(sessionKey) chars=\(prompt.count)") let runId = try await self.sendChat(prompt, gateway: gateway) self.logger.info("chat.send ok runId=\(runId, privacy: .public)") + GatewayDiagnostics.log("talk: chat.send ok runId=\(runId)") + let shouldIncremental = self.shouldUseIncrementalTTS() + var streamingTask: Task? + if shouldIncremental { + self.resetIncrementalSpeech() + streamingTask = Task { @MainActor [weak self] in + guard let self else { return } + await self.streamAssistant(runId: runId, gateway: gateway) + } + } let completion = await self.waitForChatCompletion(runId: runId, gateway: gateway, timeoutSeconds: 120) if completion == .timeout { self.logger.warning( "chat completion timeout runId=\(runId, privacy: .public); attempting history fallback") + GatewayDiagnostics.log("talk: chat completion timeout runId=\(runId)") } else if completion == .aborted { self.statusText = "Aborted" self.logger.warning("chat completion aborted runId=\(runId, privacy: .public)") + GatewayDiagnostics.log("talk: chat completion aborted runId=\(runId)") + streamingTask?.cancel() + await self.finishIncrementalSpeech() await self.start() return } else if completion == .error { self.statusText = "Chat error" self.logger.warning("chat completion error runId=\(runId, privacy: .public)") + GatewayDiagnostics.log("talk: chat completion error runId=\(runId)") + streamingTask?.cancel() + await self.finishIncrementalSpeech() await self.start() return } - guard let assistantText = try await self.waitForAssistantText( + var assistantText = try await self.waitForAssistantText( gateway: gateway, since: startedAt, timeoutSeconds: completion == .final ? 12 : 25) - else { + if assistantText == nil, shouldIncremental { + let fallback = self.incrementalSpeechBuffer.latestText + if !fallback.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + assistantText = fallback + } + } + guard let assistantText else { self.statusText = "No reply" self.logger.warning("assistant text timeout runId=\(runId, privacy: .public)") + GatewayDiagnostics.log("talk: assistant text timeout runId=\(runId)") + streamingTask?.cancel() + await self.finishIncrementalSpeech() await self.start() return } self.logger.info("assistant text ok chars=\(assistantText.count, privacy: .public)") - await self.playAssistant(text: assistantText) + GatewayDiagnostics.log("talk: assistant text ok chars=\(assistantText.count)") + streamingTask?.cancel() + if shouldIncremental { + await self.handleIncrementalAssistantFinal(text: assistantText) + } else { + await self.playAssistant(text: assistantText) + } } catch { self.statusText = "Talk failed: \(error.localizedDescription)" self.logger.error("finalize failed: \(error.localizedDescription, privacy: .public)") + GatewayDiagnostics.log("talk: failed error=\(error.localizedDescription)") } - await self.start() + if restartAfter { + await self.start() + } } private func subscribeChatIfNeeded(sessionKey: String) async { @@ -438,24 +954,7 @@ final class TalkModeManager: NSObject { let directive = parsed.directive let cleaned = parsed.stripped.trimmingCharacters(in: .whitespacesAndNewlines) guard !cleaned.isEmpty else { return } - - let requestedVoice = directive?.voiceId?.trimmingCharacters(in: .whitespacesAndNewlines) - let resolvedVoice = self.resolveVoiceAlias(requestedVoice) - if requestedVoice?.isEmpty == false, resolvedVoice == nil { - self.logger.warning("unknown voice alias \(requestedVoice ?? "?", privacy: .public)") - } - if let voice = resolvedVoice { - if directive?.once != true { - self.currentVoiceId = voice - self.voiceOverrideActive = true - } - } - if let model = directive?.modelId { - if directive?.once != true { - self.currentModelId = model - self.modelOverrideActive = true - } - } + self.applyDirective(directive) self.statusText = "Generating voice…" self.isSpeaking = true @@ -464,6 +963,11 @@ final class TalkModeManager: NSObject { do { let started = Date() let language = ElevenLabsTTSClient.validatedLanguage(directive?.language) + let requestedVoice = directive?.voiceId?.trimmingCharacters(in: .whitespacesAndNewlines) + let resolvedVoice = self.resolveVoiceAlias(requestedVoice) + if requestedVoice?.isEmpty == false, resolvedVoice == nil { + self.logger.warning("unknown voice alias \(requestedVoice ?? "?", privacy: .public)") + } let resolvedKey = (self.apiKey?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? self.apiKey : nil) ?? @@ -478,6 +982,7 @@ final class TalkModeManager: NSObject { let canUseElevenLabs = (voiceId?.isEmpty == false) && (apiKey?.isEmpty == false) if canUseElevenLabs, let voiceId, let apiKey { + GatewayDiagnostics.log("talk tts: provider=elevenlabs voiceId=\(voiceId)") let desiredOutputFormat = (directive?.outputFormat ?? self.defaultOutputFormat)? .trimmingCharacters(in: .whitespacesAndNewlines) let requestedOutputFormat = (desiredOutputFormat?.isEmpty == false) ? desiredOutputFormat : nil @@ -488,6 +993,9 @@ final class TalkModeManager: NSObject { } let modelId = directive?.modelId ?? self.currentModelId ?? self.defaultModelId + if let modelId { + GatewayDiagnostics.log("talk tts: modelId=\(modelId)") + } func makeRequest(outputFormat: String?) -> ElevenLabsTTSRequest { ElevenLabsTTSRequest( text: cleaned, @@ -545,6 +1053,7 @@ final class TalkModeManager: NSObject { } } else { self.logger.warning("tts unavailable; falling back to system voice (missing key or voiceId)") + GatewayDiagnostics.log("talk tts: provider=system (missing key or voiceId)") if self.interruptOnSpeech { do { try self.startRecognition() @@ -559,6 +1068,7 @@ final class TalkModeManager: NSObject { } catch { self.logger.error( "tts failed: \(error.localizedDescription, privacy: .public); falling back to system voice") + GatewayDiagnostics.log("talk tts: provider=system (error) msg=\(error.localizedDescription)") do { if self.interruptOnSpeech { do { @@ -582,17 +1092,24 @@ final class TalkModeManager: NSObject { } private func stopSpeaking(storeInterruption: Bool = true) { - guard self.isSpeaking else { return } - let interruptedAt = self.lastPlaybackWasPCM - ? self.pcmPlayer.stop() - : self.mp3Player.stop() - if storeInterruption { - self.lastInterruptedAtSeconds = interruptedAt + let hasIncremental = self.incrementalSpeechActive || + self.incrementalSpeechTask != nil || + !self.incrementalSpeechQueue.isEmpty + if self.isSpeaking { + let interruptedAt = self.lastPlaybackWasPCM + ? self.pcmPlayer.stop() + : self.mp3Player.stop() + if storeInterruption { + self.lastInterruptedAtSeconds = interruptedAt + } + _ = self.lastPlaybackWasPCM + ? self.mp3Player.stop() + : self.pcmPlayer.stop() + } else if !hasIncremental { + return } - _ = self.lastPlaybackWasPCM - ? self.mp3Player.stop() - : self.pcmPlayer.stop() TalkSystemSpeechSynthesizer.shared.stop() + self.cancelIncrementalSpeech() self.isSpeaking = false } @@ -605,7 +1122,501 @@ final class TalkModeManager: NSObject { return true } - private func resolveVoiceAlias(_ value: String?) -> String? { + private func shouldUseIncrementalTTS() -> Bool { + true + } + + private var isSpeechOutputActive: Bool { + self.isSpeaking || + self.incrementalSpeechActive || + self.incrementalSpeechTask != nil || + !self.incrementalSpeechQueue.isEmpty + } + + private func applyDirective(_ directive: TalkDirective?) { + let requestedVoice = directive?.voiceId?.trimmingCharacters(in: .whitespacesAndNewlines) + let resolvedVoice = self.resolveVoiceAlias(requestedVoice) + if requestedVoice?.isEmpty == false, resolvedVoice == nil { + self.logger.warning("unknown voice alias \(requestedVoice ?? "?", privacy: .public)") + } + if let voice = resolvedVoice { + if directive?.once != true { + self.currentVoiceId = voice + self.voiceOverrideActive = true + } + } + if let model = directive?.modelId { + if directive?.once != true { + self.currentModelId = model + self.modelOverrideActive = true + } + } + } + + private func resetIncrementalSpeech() { + self.incrementalSpeechQueue.removeAll() + self.incrementalSpeechTask?.cancel() + self.incrementalSpeechTask = nil + self.incrementalSpeechActive = true + self.incrementalSpeechUsed = false + self.incrementalSpeechLanguage = nil + self.incrementalSpeechBuffer = IncrementalSpeechBuffer() + self.incrementalSpeechContext = nil + self.incrementalSpeechDirective = nil + } + + private func cancelIncrementalSpeech() { + self.incrementalSpeechQueue.removeAll() + self.incrementalSpeechTask?.cancel() + self.incrementalSpeechTask = nil + self.incrementalSpeechActive = false + self.incrementalSpeechContext = nil + self.incrementalSpeechDirective = nil + } + + private func enqueueIncrementalSpeech(_ text: String) { + let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return } + self.incrementalSpeechQueue.append(trimmed) + self.incrementalSpeechUsed = true + if self.incrementalSpeechTask == nil { + self.startIncrementalSpeechTask() + } + } + + private func startIncrementalSpeechTask() { + if self.interruptOnSpeech { + do { + try self.startRecognition() + } catch { + self.logger.warning( + "startRecognition during incremental speak failed: \(error.localizedDescription, privacy: .public)") + } + } + + self.incrementalSpeechTask = Task { @MainActor [weak self] in + guard let self else { return } + while !Task.isCancelled { + guard !self.incrementalSpeechQueue.isEmpty else { break } + let segment = self.incrementalSpeechQueue.removeFirst() + self.statusText = "Speaking…" + self.isSpeaking = true + self.lastSpokenText = segment + await self.speakIncrementalSegment(segment) + } + self.isSpeaking = false + self.stopRecognition() + self.incrementalSpeechTask = nil + } + } + + private func finishIncrementalSpeech() async { + guard self.incrementalSpeechActive else { return } + let leftover = self.incrementalSpeechBuffer.flush() + if let leftover { + self.enqueueIncrementalSpeech(leftover) + } + if let task = self.incrementalSpeechTask { + _ = await task.result + } + self.incrementalSpeechActive = false + } + + private func handleIncrementalAssistantFinal(text: String) async { + let parsed = TalkDirectiveParser.parse(text) + self.applyDirective(parsed.directive) + if let lang = parsed.directive?.language { + self.incrementalSpeechLanguage = ElevenLabsTTSClient.validatedLanguage(lang) + } + await self.updateIncrementalContextIfNeeded() + let segments = self.incrementalSpeechBuffer.ingest(text: text, isFinal: true) + for segment in segments { + self.enqueueIncrementalSpeech(segment) + } + await self.finishIncrementalSpeech() + if !self.incrementalSpeechUsed { + await self.playAssistant(text: text) + } + } + + private func streamAssistant(runId: String, gateway: GatewayNodeSession) async { + let stream = await gateway.subscribeServerEvents(bufferingNewest: 200) + for await evt in stream { + if Task.isCancelled { return } + guard evt.event == "agent", let payload = evt.payload else { continue } + guard let agentEvent = try? GatewayPayloadDecoding.decode(payload, as: OpenClawAgentEventPayload.self) else { + continue + } + guard agentEvent.runId == runId, agentEvent.stream == "assistant" else { continue } + guard let text = agentEvent.data["text"]?.value as? String else { continue } + let segments = self.incrementalSpeechBuffer.ingest(text: text, isFinal: false) + if let lang = self.incrementalSpeechBuffer.directive?.language { + self.incrementalSpeechLanguage = ElevenLabsTTSClient.validatedLanguage(lang) + } + await self.updateIncrementalContextIfNeeded() + for segment in segments { + self.enqueueIncrementalSpeech(segment) + } + } + } + + private func updateIncrementalContextIfNeeded() async { + let directive = self.incrementalSpeechBuffer.directive + if let existing = self.incrementalSpeechContext, directive == self.incrementalSpeechDirective { + if existing.language != self.incrementalSpeechLanguage { + self.incrementalSpeechContext = IncrementalSpeechContext( + apiKey: existing.apiKey, + voiceId: existing.voiceId, + modelId: existing.modelId, + outputFormat: existing.outputFormat, + language: self.incrementalSpeechLanguage, + directive: existing.directive, + canUseElevenLabs: existing.canUseElevenLabs) + } + return + } + let context = await self.buildIncrementalSpeechContext(directive: directive) + self.incrementalSpeechContext = context + self.incrementalSpeechDirective = directive + } + + private func buildIncrementalSpeechContext(directive: TalkDirective?) async -> IncrementalSpeechContext { + let requestedVoice = directive?.voiceId?.trimmingCharacters(in: .whitespacesAndNewlines) + let resolvedVoice = self.resolveVoiceAlias(requestedVoice) + if requestedVoice?.isEmpty == false, resolvedVoice == nil { + self.logger.warning("unknown voice alias \(requestedVoice ?? "?", privacy: .public)") + } + let preferredVoice = resolvedVoice ?? self.currentVoiceId ?? self.defaultVoiceId + let modelId = directive?.modelId ?? self.currentModelId ?? self.defaultModelId + let desiredOutputFormat = (directive?.outputFormat ?? self.defaultOutputFormat)? + .trimmingCharacters(in: .whitespacesAndNewlines) + let requestedOutputFormat = (desiredOutputFormat?.isEmpty == false) ? desiredOutputFormat : nil + let outputFormat = ElevenLabsTTSClient.validatedOutputFormat(requestedOutputFormat ?? "pcm_44100") + if outputFormat == nil, let requestedOutputFormat { + self.logger.warning( + "talk output_format unsupported for local playback: \(requestedOutputFormat, privacy: .public)") + } + + let resolvedKey = + (self.apiKey?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? self.apiKey : nil) ?? + ProcessInfo.processInfo.environment["ELEVENLABS_API_KEY"] + let apiKey = resolvedKey?.trimmingCharacters(in: .whitespacesAndNewlines) + let voiceId: String? = if let apiKey, !apiKey.isEmpty { + await self.resolveVoiceId(preferred: preferredVoice, apiKey: apiKey) + } else { + nil + } + let canUseElevenLabs = (voiceId?.isEmpty == false) && (apiKey?.isEmpty == false) + return IncrementalSpeechContext( + apiKey: apiKey, + voiceId: voiceId, + modelId: modelId, + outputFormat: outputFormat, + language: self.incrementalSpeechLanguage, + directive: directive, + canUseElevenLabs: canUseElevenLabs) + } + + private func speakIncrementalSegment(_ text: String) async { + await self.updateIncrementalContextIfNeeded() + guard let context = self.incrementalSpeechContext else { + try? await TalkSystemSpeechSynthesizer.shared.speak( + text: text, + language: self.incrementalSpeechLanguage) + return + } + + if context.canUseElevenLabs, let apiKey = context.apiKey, let voiceId = context.voiceId { + let request = ElevenLabsTTSRequest( + text: text, + modelId: context.modelId, + outputFormat: context.outputFormat, + speed: TalkTTSValidation.resolveSpeed( + speed: context.directive?.speed, + rateWPM: context.directive?.rateWPM), + stability: TalkTTSValidation.validatedStability( + context.directive?.stability, + modelId: context.modelId), + similarity: TalkTTSValidation.validatedUnit(context.directive?.similarity), + style: TalkTTSValidation.validatedUnit(context.directive?.style), + speakerBoost: context.directive?.speakerBoost, + seed: TalkTTSValidation.validatedSeed(context.directive?.seed), + normalize: ElevenLabsTTSClient.validatedNormalize(context.directive?.normalize), + language: context.language, + latencyTier: TalkTTSValidation.validatedLatencyTier(context.directive?.latencyTier)) + let client = ElevenLabsTTSClient(apiKey: apiKey) + let stream = client.streamSynthesize(voiceId: voiceId, request: request) + let sampleRate = TalkTTSValidation.pcmSampleRate(from: context.outputFormat) + let result: StreamingPlaybackResult + if let sampleRate { + self.lastPlaybackWasPCM = true + var playback = await self.pcmPlayer.play(stream: stream, sampleRate: sampleRate) + if !playback.finished, playback.interruptedAt == nil { + self.logger.warning("pcm playback failed; retrying mp3") + self.lastPlaybackWasPCM = false + let mp3Format = ElevenLabsTTSClient.validatedOutputFormat("mp3_44100") + let mp3Stream = client.streamSynthesize( + voiceId: voiceId, + request: ElevenLabsTTSRequest( + text: text, + modelId: context.modelId, + outputFormat: mp3Format, + speed: TalkTTSValidation.resolveSpeed( + speed: context.directive?.speed, + rateWPM: context.directive?.rateWPM), + stability: TalkTTSValidation.validatedStability( + context.directive?.stability, + modelId: context.modelId), + similarity: TalkTTSValidation.validatedUnit(context.directive?.similarity), + style: TalkTTSValidation.validatedUnit(context.directive?.style), + speakerBoost: context.directive?.speakerBoost, + seed: TalkTTSValidation.validatedSeed(context.directive?.seed), + normalize: ElevenLabsTTSClient.validatedNormalize(context.directive?.normalize), + language: context.language, + latencyTier: TalkTTSValidation.validatedLatencyTier(context.directive?.latencyTier))) + playback = await self.mp3Player.play(stream: mp3Stream) + } + result = playback + } else { + self.lastPlaybackWasPCM = false + result = await self.mp3Player.play(stream: stream) + } + if !result.finished, let interruptedAt = result.interruptedAt { + self.lastInterruptedAtSeconds = interruptedAt + } + } else { + try? await TalkSystemSpeechSynthesizer.shared.speak( + text: text, + language: self.incrementalSpeechLanguage) + } + } + +} + +private struct IncrementalSpeechBuffer { + private(set) var latestText: String = "" + private(set) var directive: TalkDirective? + private var spokenOffset: Int = 0 + private var inCodeBlock = false + private var directiveParsed = false + + mutating func ingest(text: String, isFinal: Bool) -> [String] { + let normalized = text.replacingOccurrences(of: "\r\n", with: "\n") + guard let usable = self.stripDirectiveIfReady(from: normalized) else { return [] } + self.updateText(usable) + return self.extractSegments(isFinal: isFinal) + } + + mutating func flush() -> String? { + guard !self.latestText.isEmpty else { return nil } + let segments = self.extractSegments(isFinal: true) + return segments.first + } + + private mutating func stripDirectiveIfReady(from text: String) -> String? { + guard !self.directiveParsed else { return text } + let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return nil } + if trimmed.hasPrefix("{") { + guard let newlineRange = text.range(of: "\n") else { return nil } + let firstLine = text[.. commonPrefix { + self.spokenOffset = commonPrefix + } + } + if self.spokenOffset > self.latestText.count { + self.spokenOffset = self.latestText.count + } + } + + private static func commonPrefixCount(_ lhs: String, _ rhs: String) -> Int { + let left = Array(lhs) + let right = Array(rhs) + let limit = min(left.count, right.count) + var idx = 0 + while idx < limit, left[idx] == right[idx] { + idx += 1 + } + return idx + } + + private mutating func extractSegments(isFinal: Bool) -> [String] { + let chars = Array(self.latestText) + guard self.spokenOffset < chars.count else { return [] } + var idx = self.spokenOffset + var lastBoundary: Int? + var inCodeBlock = self.inCodeBlock + var buffer = "" + var bufferAtBoundary = "" + var inCodeBlockAtBoundary = inCodeBlock + + while idx < chars.count { + if idx + 2 < chars.count, + chars[idx] == "`", + chars[idx + 1] == "`", + chars[idx + 2] == "`" + { + inCodeBlock.toggle() + idx += 3 + continue + } + + if !inCodeBlock { + buffer.append(chars[idx]) + if Self.isBoundary(chars[idx]) { + lastBoundary = idx + 1 + bufferAtBoundary = buffer + inCodeBlockAtBoundary = inCodeBlock + } + } + + idx += 1 + } + + if let boundary = lastBoundary { + self.spokenOffset = boundary + self.inCodeBlock = inCodeBlockAtBoundary + let trimmed = bufferAtBoundary.trimmingCharacters(in: .whitespacesAndNewlines) + return trimmed.isEmpty ? [] : [trimmed] + } + + guard isFinal else { return [] } + self.spokenOffset = chars.count + self.inCodeBlock = inCodeBlock + let trimmed = buffer.trimmingCharacters(in: .whitespacesAndNewlines) + return trimmed.isEmpty ? [] : [trimmed] + } + + private static func isBoundary(_ ch: Character) -> Bool { + ch == "." || ch == "!" || ch == "?" || ch == "\n" + } +} + +extension TalkModeManager { + nonisolated static func requestMicrophonePermission() async -> Bool { + let session = AVAudioSession.sharedInstance() + switch session.recordPermission { + case .granted: + return true + case .denied: + return false + case .undetermined: + break + @unknown default: + return false + } + + return await self.requestPermissionWithTimeout { completion in + AVAudioSession.sharedInstance().requestRecordPermission { ok in + completion(ok) + } + } + } + + nonisolated static func requestSpeechPermission() async -> Bool { + let status = SFSpeechRecognizer.authorizationStatus() + switch status { + case .authorized: + return true + case .denied, .restricted: + return false + case .notDetermined: + break + @unknown default: + return false + } + + return await self.requestPermissionWithTimeout { completion in + SFSpeechRecognizer.requestAuthorization { authStatus in + completion(authStatus == .authorized) + } + } + } + + private nonisolated static func requestPermissionWithTimeout( + _ operation: @escaping @Sendable (@escaping (Bool) -> Void) -> Void) async -> Bool + { + do { + return try await AsyncTimeout.withTimeout( + seconds: 8, + onTimeout: { NSError(domain: "TalkMode", code: 6, userInfo: [ + NSLocalizedDescriptionKey: "permission request timed out", + ]) }, + operation: { + await withCheckedContinuation(isolation: nil) { cont in + Task { @MainActor in + operation { ok in + cont.resume(returning: ok) + } + } + } + }) + } catch { + return false + } + } + + static func permissionMessage( + kind: String, + status: AVAudioSession.RecordPermission) -> String + { + switch status { + case .denied: + return "\(kind) permission denied" + case .undetermined: + return "\(kind) permission not granted" + case .granted: + return "\(kind) permission denied" + @unknown default: + return "\(kind) permission denied" + } + } + + static func permissionMessage( + kind: String, + status: SFSpeechRecognizerAuthorizationStatus) -> String + { + switch status { + case .denied: + return "\(kind) permission denied" + case .restricted: + return "\(kind) permission restricted" + case .notDetermined: + return "\(kind) permission not granted" + case .authorized: + return "\(kind) permission denied" + @unknown default: + return "\(kind) permission denied" + } + } +} + +extension TalkModeManager { + func resolveVoiceAlias(_ value: String?) -> String? { let trimmed = (value ?? "").trimmingCharacters(in: .whitespacesAndNewlines) guard !trimmed.isEmpty else { return nil } let normalized = trimmed.lowercased() @@ -616,9 +1627,14 @@ final class TalkModeManager: NSObject { return Self.isLikelyVoiceId(trimmed) ? trimmed : nil } - private func resolveVoiceId(preferred: String?, apiKey: String) async -> String? { + func resolveVoiceId(preferred: String?, apiKey: String) async -> String? { let trimmed = preferred?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" if !trimmed.isEmpty { + // Config / directives can provide a raw ElevenLabs voiceId (not an alias). + // Accept it directly to avoid unnecessary listVoices calls (and accidental fallback selection). + if Self.isLikelyVoiceId(trimmed) { + return trimmed + } if let resolved = self.resolveVoiceAlias(trimmed) { return resolved } self.logger.warning("unknown voice alias \(trimmed, privacy: .public)") } @@ -647,23 +1663,18 @@ final class TalkModeManager: NSObject { } } - private static func isLikelyVoiceId(_ value: String) -> Bool { + static func isLikelyVoiceId(_ value: String) -> Bool { guard value.count >= 10 else { return false } return value.allSatisfy { $0.isLetter || $0.isNumber || $0 == "-" || $0 == "_" } } - private func reloadConfig() async { + func reloadConfig() async { guard let gateway else { return } do { let res = try await gateway.request(method: "config.get", paramsJSON: "{}", timeoutSeconds: 8) guard let json = try JSONSerialization.jsonObject(with: res) as? [String: Any] else { return } guard let config = json["config"] as? [String: Any] else { return } let talk = config["talk"] as? [String: Any] - let session = config["session"] as? [String: Any] - let mainKey = SessionKey.normalizeMainKey(session?["mainKey"] as? String) - if !SessionKey.isCanonicalMainSessionKey(self.mainSessionKey) { - self.mainSessionKey = mainKey - } self.defaultVoiceId = (talk?["voiceId"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) if let aliases = talk?["voiceAliases"] as? [String: Any] { var resolved: [String: String] = [:] @@ -700,30 +1711,132 @@ final class TalkModeManager: NSObject { } } - private static func configureAudioSession() throws { + static func configureAudioSession() throws { let session = AVAudioSession.sharedInstance() - try session.setCategory(.playAndRecord, mode: .voiceChat, options: [ - .duckOthers, - .mixWithOthers, + // Prefer `.spokenAudio` for STT; it tends to preserve speech energy better than `.voiceChat`. + try session.setCategory(.playAndRecord, mode: .spokenAudio, options: [ .allowBluetoothHFP, .defaultToSpeaker, ]) + try? session.setPreferredSampleRate(48_000) + try? session.setPreferredIOBufferDuration(0.02) try session.setActive(true, options: []) } - private nonisolated static func requestMicrophonePermission() async -> Bool { - await withCheckedContinuation(isolation: nil) { cont in - AVAudioApplication.requestRecordPermission { ok in - cont.resume(returning: ok) - } - } - } - - private nonisolated static func requestSpeechPermission() async -> Bool { - await withCheckedContinuation(isolation: nil) { cont in - SFSpeechRecognizer.requestAuthorization { status in - cont.resume(returning: status == .authorized) - } - } + private static func describeAudioSession() -> String { + let session = AVAudioSession.sharedInstance() + let inputs = session.currentRoute.inputs.map { "\($0.portType.rawValue):\($0.portName)" }.joined(separator: ",") + let outputs = session.currentRoute.outputs.map { "\($0.portType.rawValue):\($0.portName)" }.joined(separator: ",") + let available = session.availableInputs?.map { "\($0.portType.rawValue):\($0.portName)" }.joined(separator: ",") ?? "" + return "category=\(session.category.rawValue) mode=\(session.mode.rawValue) opts=\(session.categoryOptions.rawValue) inputAvail=\(session.isInputAvailable) routeIn=[\(inputs)] routeOut=[\(outputs)] availIn=[\(available)]" } } + +private final class AudioTapDiagnostics: @unchecked Sendable { + private let label: String + private let onLevel: (@Sendable (Float) -> Void)? + private let lock = NSLock() + private var bufferCount: Int = 0 + private var lastLoggedAt = Date.distantPast + private var lastLevelEmitAt = Date.distantPast + private var maxRmsWindow: Float = 0 + private var lastRms: Float = 0 + + init(label: String, onLevel: (@Sendable (Float) -> Void)? = nil) { + self.label = label + self.onLevel = onLevel + } + + func onBuffer(_ buffer: AVAudioPCMBuffer) { + var shouldLog = false + var shouldEmitLevel = false + var count = 0 + lock.lock() + bufferCount += 1 + count = bufferCount + let now = Date() + if now.timeIntervalSince(lastLoggedAt) >= 1.0 { + lastLoggedAt = now + shouldLog = true + } + if now.timeIntervalSince(lastLevelEmitAt) >= 0.12 { + lastLevelEmitAt = now + shouldEmitLevel = true + } + lock.unlock() + + let rate = buffer.format.sampleRate + let ch = buffer.format.channelCount + let frames = buffer.frameLength + + var rms: Float? + if let data = buffer.floatChannelData?.pointee { + let n = Int(frames) + if n > 0 { + var sum: Float = 0 + for i in 0.. maxRmsWindow { maxRmsWindow = resolvedRms } + let maxRms = maxRmsWindow + if shouldLog { maxRmsWindow = 0 } + lock.unlock() + + if shouldEmitLevel, let onLevel { + onLevel(resolvedRms) + } + + guard shouldLog else { return } + GatewayDiagnostics.log( + "\(label) mic: buffers=\(count) frames=\(frames) rate=\(Int(rate))Hz ch=\(ch) rms=\(String(format: "%.4f", resolvedRms)) max=\(String(format: "%.4f", maxRms))") + } +} + +#if DEBUG +extension TalkModeManager { + func _test_seedTranscript(_ transcript: String) { + self.lastTranscript = transcript + self.lastHeard = Date() + } + + func _test_handleTranscript(_ transcript: String, isFinal: Bool) async { + await self.handleTranscript(transcript: transcript, isFinal: isFinal) + } + + func _test_backdateLastHeard(seconds: TimeInterval) { + self.lastHeard = Date().addingTimeInterval(-seconds) + } + + func _test_runSilenceCheck() async { + await self.checkSilence() + } + + func _test_incrementalReset() { + self.incrementalSpeechBuffer = IncrementalSpeechBuffer() + } + + func _test_incrementalIngest(_ text: String, isFinal: Bool) -> [String] { + self.incrementalSpeechBuffer.ingest(text: text, isFinal: isFinal) + } +} +#endif + +private struct IncrementalSpeechContext { + let apiKey: String? + let voiceId: String? + let modelId: String? + let outputFormat: String? + let language: String? + let directive: TalkDirective? + let canUseElevenLabs: Bool +} + +// swiftlint:enable type_body_length diff --git a/apps/ios/Sources/Voice/TalkOrbOverlay.swift b/apps/ios/Sources/Voice/TalkOrbOverlay.swift index cce8c1c611..f24cab5aed 100644 --- a/apps/ios/Sources/Voice/TalkOrbOverlay.swift +++ b/apps/ios/Sources/Voice/TalkOrbOverlay.swift @@ -7,6 +7,7 @@ struct TalkOrbOverlay: View { var body: some View { let seam = self.appModel.seamColor let status = self.appModel.talkMode.statusText.trimmingCharacters(in: .whitespacesAndNewlines) + let mic = min(max(self.appModel.talkMode.micLevel, 0), 1) VStack(spacing: 14) { ZStack { @@ -28,7 +29,7 @@ struct TalkOrbOverlay: View { .fill( RadialGradient( colors: [ - seam.opacity(0.95), + seam.opacity(0.75 + (0.20 * mic)), seam.opacity(0.40), Color.black.opacity(0.55), ], @@ -36,6 +37,7 @@ struct TalkOrbOverlay: View { startRadius: 1, endRadius: 112)) .frame(width: 190, height: 190) + .scaleEffect(1.0 + (0.12 * mic)) .overlay( Circle() .stroke(seam.opacity(0.35), lineWidth: 1)) @@ -47,6 +49,13 @@ struct TalkOrbOverlay: View { self.appModel.talkMode.userTappedOrb() } + let agentName = self.appModel.activeAgentName.trimmingCharacters(in: .whitespacesAndNewlines) + if !agentName.isEmpty { + Text("Bot: \(agentName)") + .font(.system(.caption, design: .rounded).weight(.semibold)) + .foregroundStyle(Color.white.opacity(0.70)) + } + if !status.isEmpty, status != "Off" { Text(status) .font(.system(.footnote, design: .rounded).weight(.semibold)) @@ -59,6 +68,14 @@ struct TalkOrbOverlay: View { .overlay( Capsule().stroke(seam.opacity(0.22), lineWidth: 1))) } + + if self.appModel.talkMode.isListening { + Capsule() + .fill(seam.opacity(0.90)) + .frame(width: max(18, 180 * mic), height: 6) + .animation(.easeOut(duration: 0.12), value: mic) + .accessibilityLabel("Microphone level") + } } .padding(28) .onAppear { diff --git a/apps/ios/Sources/Voice/VoiceWakeManager.swift b/apps/ios/Sources/Voice/VoiceWakeManager.swift index 771b5a77a6..15a993feaa 100644 --- a/apps/ios/Sources/Voice/VoiceWakeManager.swift +++ b/apps/ios/Sources/Voice/VoiceWakeManager.swift @@ -1,6 +1,7 @@ import AVFAudio import Foundation import Observation +import OpenClawKit import Speech import SwabbleKit @@ -96,6 +97,7 @@ final class VoiceWakeManager: NSObject { private var lastDispatched: String? private var onCommand: (@Sendable (String) async -> Void)? private var userDefaultsObserver: NSObjectProtocol? + private var suppressedByTalk: Bool = false override init() { super.init() @@ -141,9 +143,28 @@ final class VoiceWakeManager: NSObject { } } + func setSuppressedByTalk(_ suppressed: Bool) { + self.suppressedByTalk = suppressed + if suppressed { + _ = self.suspendForExternalAudioCapture() + if self.isEnabled { + self.statusText = "Paused" + } + } else { + if self.isEnabled { + Task { await self.start() } + } + } + } + func start() async { guard self.isEnabled else { return } if self.isListening { return } + guard !self.suppressedByTalk else { + self.isListening = false + self.statusText = "Paused" + return + } if ProcessInfo.processInfo.environment["SIMULATOR_DEVICE_NAME"] != nil || ProcessInfo.processInfo.environment["SIMULATOR_UDID"] != nil @@ -159,14 +180,18 @@ final class VoiceWakeManager: NSObject { let micOk = await Self.requestMicrophonePermission() guard micOk else { - self.statusText = "Microphone permission denied" + self.statusText = Self.permissionMessage( + kind: "Microphone", + status: AVAudioSession.sharedInstance().recordPermission) self.isListening = false return } let speechOk = await Self.requestSpeechPermission() guard speechOk else { - self.statusText = "Speech recognition permission denied" + self.statusText = Self.permissionMessage( + kind: "Speech recognition", + status: SFSpeechRecognizer.authorizationStatus()) self.isListening = false return } @@ -364,20 +389,101 @@ final class VoiceWakeManager: NSObject { } private nonisolated static func requestMicrophonePermission() async -> Bool { - await withCheckedContinuation(isolation: nil) { cont in - AVAudioApplication.requestRecordPermission { ok in - cont.resume(returning: ok) + let session = AVAudioSession.sharedInstance() + switch session.recordPermission { + case .granted: + return true + case .denied: + return false + case .undetermined: + break + @unknown default: + return false + } + + return await self.requestPermissionWithTimeout { completion in + AVAudioSession.sharedInstance().requestRecordPermission { ok in + completion(ok) } } } private nonisolated static func requestSpeechPermission() async -> Bool { - await withCheckedContinuation(isolation: nil) { cont in - SFSpeechRecognizer.requestAuthorization { status in - cont.resume(returning: status == .authorized) + let status = SFSpeechRecognizer.authorizationStatus() + switch status { + case .authorized: + return true + case .denied, .restricted: + return false + case .notDetermined: + break + @unknown default: + return false + } + + return await self.requestPermissionWithTimeout { completion in + SFSpeechRecognizer.requestAuthorization { authStatus in + completion(authStatus == .authorized) } } } + + private nonisolated static func requestPermissionWithTimeout( + _ operation: @escaping @Sendable (@escaping (Bool) -> Void) -> Void) async -> Bool + { + do { + return try await AsyncTimeout.withTimeout( + seconds: 8, + onTimeout: { NSError(domain: "VoiceWake", code: 6, userInfo: [ + NSLocalizedDescriptionKey: "permission request timed out", + ]) }, + operation: { + await withCheckedContinuation(isolation: nil) { cont in + Task { @MainActor in + operation { ok in + cont.resume(returning: ok) + } + } + } + }) + } catch { + return false + } + } + + private static func permissionMessage( + kind: String, + status: AVAudioSession.RecordPermission) -> String + { + switch status { + case .denied: + return "\(kind) permission denied" + case .undetermined: + return "\(kind) permission not granted" + case .granted: + return "\(kind) permission denied" + @unknown default: + return "\(kind) permission denied" + } + } + + private static func permissionMessage( + kind: String, + status: SFSpeechRecognizerAuthorizationStatus) -> String + { + switch status { + case .denied: + return "\(kind) permission denied" + case .restricted: + return "\(kind) permission restricted" + case .notDetermined: + return "\(kind) permission not granted" + case .authorized: + return "\(kind) permission denied" + @unknown default: + return "\(kind) permission denied" + } + } } #if DEBUG diff --git a/apps/ios/SwiftSources.input.xcfilelist b/apps/ios/SwiftSources.input.xcfilelist index 4952019c77..5b1ba7d70e 100644 --- a/apps/ios/SwiftSources.input.xcfilelist +++ b/apps/ios/SwiftSources.input.xcfilelist @@ -9,6 +9,7 @@ Sources/Chat/IOSGatewayChatTransport.swift Sources/OpenClawApp.swift Sources/Location/LocationService.swift Sources/Model/NodeAppModel.swift +Sources/Model/NodeAppModel+Canvas.swift Sources/RootCanvas.swift Sources/RootTabs.swift Sources/Screen/ScreenController.swift diff --git a/apps/ios/Tests/GatewaySettingsStoreTests.swift b/apps/ios/Tests/GatewaySettingsStoreTests.swift index 255c7aac9b..cd9842239c 100644 --- a/apps/ios/Tests/GatewaySettingsStoreTests.swift +++ b/apps/ios/Tests/GatewaySettingsStoreTests.swift @@ -7,8 +7,8 @@ private struct KeychainEntry: Hashable { let account: String } -private let gatewayService = "bot.molt.gateway" -private let nodeService = "bot.molt.node" +private let gatewayService = "ai.openclaw.gateway" +private let nodeService = "ai.openclaw.node" private let instanceIdEntry = KeychainEntry(service: nodeService, account: "instanceId") private let preferredGatewayEntry = KeychainEntry(service: gatewayService, account: "preferredStableID") private let lastGatewayEntry = KeychainEntry(service: gatewayService, account: "lastDiscoveredStableID") diff --git a/apps/ios/Tests/NodeAppModelInvokeTests.swift b/apps/ios/Tests/NodeAppModelInvokeTests.swift index 124059021d..3041439399 100644 --- a/apps/ios/Tests/NodeAppModelInvokeTests.swift +++ b/apps/ios/Tests/NodeAppModelInvokeTests.swift @@ -101,7 +101,8 @@ private func withUserDefaults(_ updates: [String: Any?], _ body: () throws -> #expect(presentRes.ok == true) #expect(appModel.screen.urlString.isEmpty) - let navigateParams = OpenClawCanvasNavigateParams(url: "http://localhost:18789/") + // Loopback URLs are rejected (they are not meaningful for a remote gateway). + let navigateParams = OpenClawCanvasNavigateParams(url: "http://example.com/") let navData = try JSONEncoder().encode(navigateParams) let navJSON = String(decoding: navData, as: UTF8.self) let navigate = BridgeInvokeRequest( @@ -110,7 +111,7 @@ private func withUserDefaults(_ updates: [String: Any?], _ body: () throws -> paramsJSON: navJSON) let navRes = await appModel._test_handleInvoke(navigate) #expect(navRes.ok == true) - #expect(appModel.screen.urlString == "http://localhost:18789/") + #expect(appModel.screen.urlString == "http://example.com/") let evalParams = OpenClawCanvasEvalParams(javaScript: "1+1") let evalData = try JSONEncoder().encode(evalParams) diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/CalendarCommands.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/CalendarCommands.swift new file mode 100644 index 0000000000..9935b81ba9 --- /dev/null +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/CalendarCommands.swift @@ -0,0 +1,93 @@ +import Foundation + +public enum OpenClawCalendarCommand: String, Codable, Sendable { + case events = "calendar.events" + case add = "calendar.add" +} + +public struct OpenClawCalendarEventsParams: Codable, Sendable, Equatable { + public var startISO: String? + public var endISO: String? + public var limit: Int? + + public init(startISO: String? = nil, endISO: String? = nil, limit: Int? = nil) { + self.startISO = startISO + self.endISO = endISO + self.limit = limit + } +} + +public struct OpenClawCalendarAddParams: Codable, Sendable, Equatable { + public var title: String + public var startISO: String + public var endISO: String + public var isAllDay: Bool? + public var location: String? + public var notes: String? + public var calendarId: String? + public var calendarTitle: String? + + public init( + title: String, + startISO: String, + endISO: String, + isAllDay: Bool? = nil, + location: String? = nil, + notes: String? = nil, + calendarId: String? = nil, + calendarTitle: String? = nil) + { + self.title = title + self.startISO = startISO + self.endISO = endISO + self.isAllDay = isAllDay + self.location = location + self.notes = notes + self.calendarId = calendarId + self.calendarTitle = calendarTitle + } +} + +public struct OpenClawCalendarEventPayload: Codable, Sendable, Equatable { + public var identifier: String + public var title: String + public var startISO: String + public var endISO: String + public var isAllDay: Bool + public var location: String? + public var calendarTitle: String? + + public init( + identifier: String, + title: String, + startISO: String, + endISO: String, + isAllDay: Bool, + location: String? = nil, + calendarTitle: String? = nil) + { + self.identifier = identifier + self.title = title + self.startISO = startISO + self.endISO = endISO + self.isAllDay = isAllDay + self.location = location + self.calendarTitle = calendarTitle + } +} + +public struct OpenClawCalendarEventsPayload: Codable, Sendable, Equatable { + public var events: [OpenClawCalendarEventPayload] + + public init(events: [OpenClawCalendarEventPayload]) { + self.events = events + } +} + +public struct OpenClawCalendarAddPayload: Codable, Sendable, Equatable { + public var event: OpenClawCalendarEventPayload + + public init(event: OpenClawCalendarEventPayload) { + self.event = event + } +} diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/Capabilities.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/Capabilities.swift index 1cb820e732..d5c5e3c439 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawKit/Capabilities.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/Capabilities.swift @@ -6,4 +6,10 @@ public enum OpenClawCapability: String, Codable, Sendable { case screen case voiceWake case location + case device + case photos + case contacts + case calendar + case reminders + case motion } diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/ChatCommands.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/ChatCommands.swift new file mode 100644 index 0000000000..98bac6205d --- /dev/null +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/ChatCommands.swift @@ -0,0 +1,23 @@ +import Foundation + +public enum OpenClawChatCommand: String, Codable, Sendable { + case push = "chat.push" +} + +public struct OpenClawChatPushParams: Codable, Sendable, Equatable { + public var text: String + public var speak: Bool? + + public init(text: String, speak: Bool? = nil) { + self.text = text + self.speak = speak + } +} + +public struct OpenClawChatPushPayload: Codable, Sendable, Equatable { + public var messageId: String? + + public init(messageId: String? = nil) { + self.messageId = messageId + } +} diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/ContactsCommands.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/ContactsCommands.swift new file mode 100644 index 0000000000..d99f6b9e74 --- /dev/null +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/ContactsCommands.swift @@ -0,0 +1,85 @@ +import Foundation + +public enum OpenClawContactsCommand: String, Codable, Sendable { + case search = "contacts.search" + case add = "contacts.add" +} + +public struct OpenClawContactsSearchParams: Codable, Sendable, Equatable { + public var query: String? + public var limit: Int? + + public init(query: String? = nil, limit: Int? = nil) { + self.query = query + self.limit = limit + } +} + +public struct OpenClawContactsAddParams: Codable, Sendable, Equatable { + public var givenName: String? + public var familyName: String? + public var organizationName: String? + public var displayName: String? + public var phoneNumbers: [String]? + public var emails: [String]? + + public init( + givenName: String? = nil, + familyName: String? = nil, + organizationName: String? = nil, + displayName: String? = nil, + phoneNumbers: [String]? = nil, + emails: [String]? = nil) + { + self.givenName = givenName + self.familyName = familyName + self.organizationName = organizationName + self.displayName = displayName + self.phoneNumbers = phoneNumbers + self.emails = emails + } +} + +public struct OpenClawContactPayload: Codable, Sendable, Equatable { + public var identifier: String + public var displayName: String + public var givenName: String + public var familyName: String + public var organizationName: String + public var phoneNumbers: [String] + public var emails: [String] + + public init( + identifier: String, + displayName: String, + givenName: String, + familyName: String, + organizationName: String, + phoneNumbers: [String], + emails: [String]) + { + self.identifier = identifier + self.displayName = displayName + self.givenName = givenName + self.familyName = familyName + self.organizationName = organizationName + self.phoneNumbers = phoneNumbers + self.emails = emails + } +} + +public struct OpenClawContactsSearchPayload: Codable, Sendable, Equatable { + public var contacts: [OpenClawContactPayload] + + public init(contacts: [OpenClawContactPayload]) { + self.contacts = contacts + } +} + +public struct OpenClawContactsAddPayload: Codable, Sendable, Equatable { + public var contact: OpenClawContactPayload + + public init(contact: OpenClawContactPayload) { + self.contact = contact + } +} diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/DeviceCommands.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/DeviceCommands.swift new file mode 100644 index 0000000000..c58224b3f1 --- /dev/null +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/DeviceCommands.swift @@ -0,0 +1,134 @@ +import Foundation + +public enum OpenClawDeviceCommand: String, Codable, Sendable { + case status = "device.status" + case info = "device.info" +} + +public enum OpenClawBatteryState: String, Codable, Sendable { + case unknown + case unplugged + case charging + case full +} + +public enum OpenClawThermalState: String, Codable, Sendable { + case nominal + case fair + case serious + case critical +} + +public enum OpenClawNetworkPathStatus: String, Codable, Sendable { + case satisfied + case unsatisfied + case requiresConnection +} + +public enum OpenClawNetworkInterfaceType: String, Codable, Sendable { + case wifi + case cellular + case wired + case other +} + +public struct OpenClawBatteryStatusPayload: Codable, Sendable, Equatable { + public var level: Double? + public var state: OpenClawBatteryState + public var lowPowerModeEnabled: Bool + + public init(level: Double?, state: OpenClawBatteryState, lowPowerModeEnabled: Bool) { + self.level = level + self.state = state + self.lowPowerModeEnabled = lowPowerModeEnabled + } +} + +public struct OpenClawThermalStatusPayload: Codable, Sendable, Equatable { + public var state: OpenClawThermalState + + public init(state: OpenClawThermalState) { + self.state = state + } +} + +public struct OpenClawStorageStatusPayload: Codable, Sendable, Equatable { + public var totalBytes: Int64 + public var freeBytes: Int64 + public var usedBytes: Int64 + + public init(totalBytes: Int64, freeBytes: Int64, usedBytes: Int64) { + self.totalBytes = totalBytes + self.freeBytes = freeBytes + self.usedBytes = usedBytes + } +} + +public struct OpenClawNetworkStatusPayload: Codable, Sendable, Equatable { + public var status: OpenClawNetworkPathStatus + public var isExpensive: Bool + public var isConstrained: Bool + public var interfaces: [OpenClawNetworkInterfaceType] + + public init( + status: OpenClawNetworkPathStatus, + isExpensive: Bool, + isConstrained: Bool, + interfaces: [OpenClawNetworkInterfaceType]) + { + self.status = status + self.isExpensive = isExpensive + self.isConstrained = isConstrained + self.interfaces = interfaces + } +} + +public struct OpenClawDeviceStatusPayload: Codable, Sendable, Equatable { + public var battery: OpenClawBatteryStatusPayload + public var thermal: OpenClawThermalStatusPayload + public var storage: OpenClawStorageStatusPayload + public var network: OpenClawNetworkStatusPayload + public var uptimeSeconds: Double + + public init( + battery: OpenClawBatteryStatusPayload, + thermal: OpenClawThermalStatusPayload, + storage: OpenClawStorageStatusPayload, + network: OpenClawNetworkStatusPayload, + uptimeSeconds: Double) + { + self.battery = battery + self.thermal = thermal + self.storage = storage + self.network = network + self.uptimeSeconds = uptimeSeconds + } +} + +public struct OpenClawDeviceInfoPayload: Codable, Sendable, Equatable { + public var deviceName: String + public var modelIdentifier: String + public var systemName: String + public var systemVersion: String + public var appVersion: String + public var appBuild: String + public var locale: String + + public init( + deviceName: String, + modelIdentifier: String, + systemName: String, + systemVersion: String, + appVersion: String, + appBuild: String, + locale: String) + { + self.deviceName = deviceName + self.modelIdentifier = modelIdentifier + self.systemName = systemName + self.systemVersion = systemVersion + self.appVersion = appVersion + self.appBuild = appBuild + self.locale = locale + } +} diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayChannel.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayChannel.swift index 0b2e70471c..a255fc7a81 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayChannel.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayChannel.swift @@ -72,6 +72,10 @@ public struct GatewayConnectOptions: Sendable { public var clientId: String public var clientMode: String public var clientDisplayName: String? + // When false, the connection omits the signed device identity payload. + // This is useful for secondary "operator" connections where the shared gateway token + // should authorize without triggering device pairing flows. + public var includeDeviceIdentity: Bool public init( role: String, @@ -81,7 +85,8 @@ public struct GatewayConnectOptions: Sendable { permissions: [String: Bool], clientId: String, clientMode: String, - clientDisplayName: String?) + clientDisplayName: String?, + includeDeviceIdentity: Bool = true) { self.role = role self.scopes = scopes @@ -91,6 +96,7 @@ public struct GatewayConnectOptions: Sendable { self.clientId = clientId self.clientMode = clientMode self.clientDisplayName = clientDisplayName + self.includeDeviceIdentity = includeDeviceIdentity } } @@ -128,7 +134,7 @@ public actor GatewayChannelActor { private let decoder = JSONDecoder() private let encoder = JSONEncoder() private let connectTimeoutSeconds: Double = 6 - private let connectChallengeTimeoutSeconds: Double = 0.75 + private let connectChallengeTimeoutSeconds: Double = 3.0 private var watchdogTask: Task? private var tickTask: Task? private let defaultRequestTimeoutMs: Double = 15000 @@ -307,9 +313,15 @@ public actor GatewayChannelActor { if !options.permissions.isEmpty { params["permissions"] = ProtoAnyCodable(options.permissions) } - let identity = DeviceIdentityStore.loadOrCreate() - let storedToken = DeviceAuthStore.loadToken(deviceId: identity.deviceId, role: role)?.token - let authToken = storedToken ?? self.token + let includeDeviceIdentity = options.includeDeviceIdentity + let identity = includeDeviceIdentity ? DeviceIdentityStore.loadOrCreate() : nil + let storedToken = + (includeDeviceIdentity && identity != nil) + ? DeviceAuthStore.loadToken(deviceId: identity!.deviceId, role: role)?.token + : nil + // If we're not sending a device identity, a device token can't be validated server-side. + // In that mode we always use the shared gateway token/password. + let authToken = includeDeviceIdentity ? (storedToken ?? self.token) : self.token let authSource: GatewayAuthSource if storedToken != nil { authSource = .deviceToken @@ -322,7 +334,7 @@ public actor GatewayChannelActor { } self.lastAuthSource = authSource self.logger.info("gateway connect auth=\(authSource.rawValue, privacy: .public)") - let canFallbackToShared = storedToken != nil && self.token != nil + let canFallbackToShared = includeDeviceIdentity && storedToken != nil && self.token != nil if let authToken { params["auth"] = ProtoAnyCodable(["token": ProtoAnyCodable(authToken)]) } else if let password = self.password { @@ -333,7 +345,7 @@ public actor GatewayChannelActor { let scopesValue = scopes.joined(separator: ",") var payloadParts = [ connectNonce == nil ? "v1" : "v2", - identity.deviceId, + identity?.deviceId ?? "", clientId, clientMode, role, @@ -345,18 +357,20 @@ public actor GatewayChannelActor { payloadParts.append(connectNonce) } let payload = payloadParts.joined(separator: "|") - if let signature = DeviceIdentityStore.signPayload(payload, identity: identity), - let publicKey = DeviceIdentityStore.publicKeyBase64Url(identity) { - var device: [String: ProtoAnyCodable] = [ - "id": ProtoAnyCodable(identity.deviceId), - "publicKey": ProtoAnyCodable(publicKey), - "signature": ProtoAnyCodable(signature), - "signedAt": ProtoAnyCodable(signedAtMs), - ] - if let connectNonce { - device["nonce"] = ProtoAnyCodable(connectNonce) + if includeDeviceIdentity, let identity { + if let signature = DeviceIdentityStore.signPayload(payload, identity: identity), + let publicKey = DeviceIdentityStore.publicKeyBase64Url(identity) { + var device: [String: ProtoAnyCodable] = [ + "id": ProtoAnyCodable(identity.deviceId), + "publicKey": ProtoAnyCodable(publicKey), + "signature": ProtoAnyCodable(signature), + "signedAt": ProtoAnyCodable(signedAtMs), + ] + if let connectNonce { + device["nonce"] = ProtoAnyCodable(connectNonce) + } + params["device"] = ProtoAnyCodable(device) } - params["device"] = ProtoAnyCodable(device) } let frame = RequestFrame( @@ -371,7 +385,9 @@ public actor GatewayChannelActor { try await self.handleConnectResponse(response, identity: identity, role: role) } catch { if canFallbackToShared { - DeviceAuthStore.clearToken(deviceId: identity.deviceId, role: role) + if let identity { + DeviceAuthStore.clearToken(deviceId: identity.deviceId, role: role) + } } throw error } @@ -379,7 +395,7 @@ public actor GatewayChannelActor { private func handleConnectResponse( _ res: ResponseFrame, - identity: DeviceIdentity, + identity: DeviceIdentity?, role: String ) async throws { if res.ok == false { @@ -404,11 +420,13 @@ public actor GatewayChannelActor { let authRole = auth["role"]?.value as? String ?? role let scopes = (auth["scopes"]?.value as? [ProtoAnyCodable])? .compactMap { $0.value as? String } ?? [] - _ = DeviceAuthStore.storeToken( - deviceId: identity.deviceId, - role: authRole, - token: deviceToken, - scopes: scopes) + if let identity { + _ = DeviceAuthStore.storeToken( + deviceId: identity.deviceId, + role: authRole, + token: deviceToken, + scopes: scopes) + } } self.lastTick = Date() self.tickTask?.cancel() @@ -498,7 +516,10 @@ public actor GatewayChannelActor { } }) } catch { - if error is ConnectChallengeError { return nil } + if error is ConnectChallengeError { + self.logger.warning("gateway connect challenge timed out") + return nil + } throw error } } diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayNodeSession.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayNodeSession.swift index dbc7dba3d6..6311b4632c 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayNodeSession.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/GatewayNodeSession.swift @@ -21,6 +21,7 @@ public actor GatewayNodeSession { private var activeURL: URL? private var activeToken: String? private var activePassword: String? + private var activeConnectOptionsKey: String? private var connectOptions: GatewayConnectOptions? private var onConnected: (@Sendable () async -> Void)? private var onDisconnected: (@Sendable (String) async -> Void)? @@ -103,6 +104,42 @@ public actor GatewayNodeSession { public init() {} + private func connectOptionsKey(_ options: GatewayConnectOptions) -> String { + func sorted(_ values: [String]) -> String { + values.map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } + .filter { !$0.isEmpty } + .sorted() + .joined(separator: ",") + } + let role = options.role.trimmingCharacters(in: .whitespacesAndNewlines) + let scopes = sorted(options.scopes) + let caps = sorted(options.caps) + let commands = sorted(options.commands) + let clientId = options.clientId.trimmingCharacters(in: .whitespacesAndNewlines) + let clientMode = options.clientMode.trimmingCharacters(in: .whitespacesAndNewlines) + let clientDisplayName = (options.clientDisplayName ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + let includeDeviceIdentity = options.includeDeviceIdentity ? "1" : "0" + let permissions = options.permissions + .map { key, value in + let trimmed = key.trimmingCharacters(in: .whitespacesAndNewlines) + return "\(trimmed)=\(value ? "1" : "0")" + } + .sorted() + .joined(separator: ",") + + return [ + role, + scopes, + caps, + commands, + clientId, + clientMode, + clientDisplayName, + includeDeviceIdentity, + permissions, + ].joined(separator: "|") + } + public func connect( url: URL, token: String?, @@ -113,9 +150,11 @@ public actor GatewayNodeSession { onDisconnected: @escaping @Sendable (String) async -> Void, onInvoke: @escaping @Sendable (BridgeInvokeRequest) async -> BridgeInvokeResponse ) async throws { + let nextOptionsKey = self.connectOptionsKey(connectOptions) let shouldReconnect = self.activeURL != url || self.activeToken != token || self.activePassword != password || + self.activeConnectOptionsKey != nextOptionsKey || self.channel == nil self.connectOptions = connectOptions @@ -138,12 +177,13 @@ public actor GatewayNodeSession { }, connectOptions: connectOptions, disconnectHandler: { [weak self] reason in - await self?.onDisconnected?(reason) + await self?.handleChannelDisconnected(reason) }) self.channel = channel self.activeURL = url self.activeToken = token self.activePassword = password + self.activeConnectOptionsKey = nextOptionsKey } guard let channel = self.channel else { @@ -157,7 +197,6 @@ public actor GatewayNodeSession { _ = await self.waitForSnapshot(timeoutMs: 500) await self.notifyConnectedIfNeeded() } catch { - await onDisconnected(error.localizedDescription) throw error } } @@ -168,6 +207,7 @@ public actor GatewayNodeSession { self.activeURL = nil self.activeToken = nil self.activePassword = nil + self.activeConnectOptionsKey = nil self.resetConnectionState() } @@ -249,6 +289,13 @@ public actor GatewayNodeSession { } } + private func handleChannelDisconnected(_ reason: String) async { + // The underlying channel can auto-reconnect; resetting state here ensures we surface a fresh + // onConnected callback once a new snapshot arrives after reconnect. + self.resetConnectionState() + await self.onDisconnected?(reason) + } + private func markSnapshotReceived() { self.snapshotReceived = true if !self.snapshotWaiters.isEmpty { diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/MotionCommands.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/MotionCommands.swift new file mode 100644 index 0000000000..ab487bfd00 --- /dev/null +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/MotionCommands.swift @@ -0,0 +1,95 @@ +import Foundation + +public enum OpenClawMotionCommand: String, Codable, Sendable { + case activity = "motion.activity" + case pedometer = "motion.pedometer" +} + +public struct OpenClawMotionActivityParams: Codable, Sendable, Equatable { + public var startISO: String? + public var endISO: String? + public var limit: Int? + + public init(startISO: String? = nil, endISO: String? = nil, limit: Int? = nil) { + self.startISO = startISO + self.endISO = endISO + self.limit = limit + } +} + +public struct OpenClawMotionActivityEntry: Codable, Sendable, Equatable { + public var startISO: String + public var endISO: String + public var confidence: String + public var isWalking: Bool + public var isRunning: Bool + public var isCycling: Bool + public var isAutomotive: Bool + public var isStationary: Bool + public var isUnknown: Bool + + public init( + startISO: String, + endISO: String, + confidence: String, + isWalking: Bool, + isRunning: Bool, + isCycling: Bool, + isAutomotive: Bool, + isStationary: Bool, + isUnknown: Bool) + { + self.startISO = startISO + self.endISO = endISO + self.confidence = confidence + self.isWalking = isWalking + self.isRunning = isRunning + self.isCycling = isCycling + self.isAutomotive = isAutomotive + self.isStationary = isStationary + self.isUnknown = isUnknown + } +} + +public struct OpenClawMotionActivityPayload: Codable, Sendable, Equatable { + public var activities: [OpenClawMotionActivityEntry] + + public init(activities: [OpenClawMotionActivityEntry]) { + self.activities = activities + } +} + +public struct OpenClawPedometerParams: Codable, Sendable, Equatable { + public var startISO: String? + public var endISO: String? + + public init(startISO: String? = nil, endISO: String? = nil) { + self.startISO = startISO + self.endISO = endISO + } +} + +public struct OpenClawPedometerPayload: Codable, Sendable, Equatable { + public var startISO: String + public var endISO: String + public var steps: Int? + public var distanceMeters: Double? + public var floorsAscended: Int? + public var floorsDescended: Int? + + public init( + startISO: String, + endISO: String, + steps: Int?, + distanceMeters: Double?, + floorsAscended: Int?, + floorsDescended: Int?) + { + self.startISO = startISO + self.endISO = endISO + self.steps = steps + self.distanceMeters = distanceMeters + self.floorsAscended = floorsAscended + self.floorsDescended = floorsDescended + } +} diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/PhotosCommands.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/PhotosCommands.swift new file mode 100644 index 0000000000..8d22f5d279 --- /dev/null +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/PhotosCommands.swift @@ -0,0 +1,41 @@ +import Foundation + +public enum OpenClawPhotosCommand: String, Codable, Sendable { + case latest = "photos.latest" +} + +public struct OpenClawPhotosLatestParams: Codable, Sendable, Equatable { + public var limit: Int? + public var maxWidth: Int? + public var quality: Double? + + public init(limit: Int? = nil, maxWidth: Int? = nil, quality: Double? = nil) { + self.limit = limit + self.maxWidth = maxWidth + self.quality = quality + } +} + +public struct OpenClawPhotoPayload: Codable, Sendable, Equatable { + public var format: String + public var base64: String + public var width: Int + public var height: Int + public var createdAt: String? + + public init(format: String, base64: String, width: Int, height: Int, createdAt: String? = nil) { + self.format = format + self.base64 = base64 + self.width = width + self.height = height + self.createdAt = createdAt + } +} + +public struct OpenClawPhotosLatestPayload: Codable, Sendable, Equatable { + public var photos: [OpenClawPhotoPayload] + + public init(photos: [OpenClawPhotoPayload]) { + self.photos = photos + } +} diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/RemindersCommands.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/RemindersCommands.swift new file mode 100644 index 0000000000..ac275d8036 --- /dev/null +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/RemindersCommands.swift @@ -0,0 +1,82 @@ +import Foundation + +public enum OpenClawRemindersCommand: String, Codable, Sendable { + case list = "reminders.list" + case add = "reminders.add" +} + +public enum OpenClawReminderStatusFilter: String, Codable, Sendable { + case incomplete + case completed + case all +} + +public struct OpenClawRemindersListParams: Codable, Sendable, Equatable { + public var status: OpenClawReminderStatusFilter? + public var limit: Int? + + public init(status: OpenClawReminderStatusFilter? = nil, limit: Int? = nil) { + self.status = status + self.limit = limit + } +} + +public struct OpenClawRemindersAddParams: Codable, Sendable, Equatable { + public var title: String + public var dueISO: String? + public var notes: String? + public var listId: String? + public var listName: String? + + public init( + title: String, + dueISO: String? = nil, + notes: String? = nil, + listId: String? = nil, + listName: String? = nil) + { + self.title = title + self.dueISO = dueISO + self.notes = notes + self.listId = listId + self.listName = listName + } +} + +public struct OpenClawReminderPayload: Codable, Sendable, Equatable { + public var identifier: String + public var title: String + public var dueISO: String? + public var completed: Bool + public var listName: String? + + public init( + identifier: String, + title: String, + dueISO: String? = nil, + completed: Bool, + listName: String? = nil) + { + self.identifier = identifier + self.title = title + self.dueISO = dueISO + self.completed = completed + self.listName = listName + } +} + +public struct OpenClawRemindersListPayload: Codable, Sendable, Equatable { + public var reminders: [OpenClawReminderPayload] + + public init(reminders: [OpenClawReminderPayload]) { + self.reminders = reminders + } +} + +public struct OpenClawRemindersAddPayload: Codable, Sendable, Equatable { + public var reminder: OpenClawReminderPayload + + public init(reminder: OpenClawReminderPayload) { + self.reminder = reminder + } +} diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/TalkCommands.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/TalkCommands.swift new file mode 100644 index 0000000000..755fc97a98 --- /dev/null +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/TalkCommands.swift @@ -0,0 +1,28 @@ +import Foundation + +public enum OpenClawTalkCommand: String, Codable, Sendable { + case pttStart = "talk.ptt.start" + case pttStop = "talk.ptt.stop" + case pttCancel = "talk.ptt.cancel" + case pttOnce = "talk.ptt.once" +} + +public struct OpenClawTalkPTTStartPayload: Codable, Sendable, Equatable { + public var captureId: String + + public init(captureId: String) { + self.captureId = captureId + } +} + +public struct OpenClawTalkPTTStopPayload: Codable, Sendable, Equatable { + public var captureId: String + public var transcript: String? + public var status: String + + public init(captureId: String, transcript: String?, status: String) { + self.captureId = captureId + self.transcript = transcript + self.status = status + } +}