mirror of
https://github.com/openclaw/openclaw.git
synced 2026-02-19 18:39:20 -05:00
Discord: add voice docs and TTS overrides
This commit is contained in:
@@ -603,6 +603,47 @@ Example:
|
||||
}
|
||||
```
|
||||
|
||||
## Voice channels
|
||||
|
||||
OpenClaw can join Discord voice channels for realtime, continuous conversations. This is separate from voice message attachments.
|
||||
|
||||
Requirements:
|
||||
|
||||
- Enable native commands (`commands.native` or `channels.discord.commands.native`).
|
||||
- Configure `channels.discord.voice`.
|
||||
- The bot needs Connect + Speak permissions in the target voice channel.
|
||||
|
||||
Use the Discord-only native command `/vc join|leave|status` to control sessions. The command uses the account default agent and follows the same allowlist and group policy rules as other Discord commands.
|
||||
|
||||
Auto-join example:
|
||||
|
||||
```json5
|
||||
{
|
||||
channels: {
|
||||
discord: {
|
||||
voice: {
|
||||
enabled: true,
|
||||
autoJoin: [
|
||||
{
|
||||
guildId: "123456789012345678",
|
||||
channelId: "234567890123456789",
|
||||
},
|
||||
],
|
||||
tts: {
|
||||
provider: "openai",
|
||||
openai: { voice: "alloy" },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
||||
- `voice.tts` overrides `messages.tts` for voice playback only.
|
||||
- Omit the `voice` block to keep voice support disabled for the account.
|
||||
|
||||
## Voice messages
|
||||
|
||||
Discord voice messages show a waveform preview and require OGG/Opus audio plus metadata. OpenClaw generates the waveform automatically, but it needs `ffmpeg` and `ffprobe` available on the gateway host to inspect and convert audio files.
|
||||
|
||||
@@ -216,6 +216,19 @@ WhatsApp runs through the gateway's web channel (Baileys Web). It starts automat
|
||||
accentColor: "#5865F2",
|
||||
},
|
||||
},
|
||||
voice: {
|
||||
enabled: true,
|
||||
autoJoin: [
|
||||
{
|
||||
guildId: "123456789012345678",
|
||||
channelId: "234567890123456789",
|
||||
},
|
||||
],
|
||||
tts: {
|
||||
provider: "openai",
|
||||
openai: { voice: "alloy" },
|
||||
},
|
||||
},
|
||||
retry: {
|
||||
attempts: 3,
|
||||
minDelayMs: 500,
|
||||
@@ -233,6 +246,7 @@ WhatsApp runs through the gateway's web channel (Baileys Web). It starts automat
|
||||
- Bot-authored messages are ignored by default. `allowBots: true` enables them (own messages still filtered).
|
||||
- `maxLinesPerMessage` (default 17) splits tall messages even when under 2000 chars.
|
||||
- `channels.discord.ui.components.accentColor` sets the accent color for Discord components v2 containers.
|
||||
- `channels.discord.voice` enables Discord voice channel conversations and optional auto-join + TTS overrides.
|
||||
|
||||
**Reaction notification modes:** `off` (none), `own` (bot's messages, default), `all` (all messages), `allowlist` (from `guilds.<id>.users` on all messages).
|
||||
|
||||
|
||||
@@ -118,6 +118,7 @@ Notes:
|
||||
- `/allowlist add|remove` requires `commands.config=true` and honors channel `configWrites`.
|
||||
- `/usage` controls the per-response usage footer; `/usage cost` prints a local cost summary from OpenClaw session logs.
|
||||
- `/restart` is disabled by default; set `commands.restart: true` to enable it.
|
||||
- Discord-only native command: `/vc join|leave|status` controls voice channels (requires `channels.discord.voice` and native commands; not available as text).
|
||||
- `/verbose` is meant for debugging and extra visibility; keep it **off** in normal use.
|
||||
- `/reasoning` (and `/verbose`) are risky in group settings: they may reveal internal reasoning or tool output you did not intend to expose. Prefer leaving them off, especially in group chats.
|
||||
- **Fast path:** command-only messages from allowlisted senders are handled immediately (bypass queue + model).
|
||||
|
||||
@@ -265,6 +265,8 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"channels.discord.ui.components.accentColor": "Discord Component Accent Color",
|
||||
"channels.discord.intents.presence": "Discord Presence Intent",
|
||||
"channels.discord.intents.guildMembers": "Discord Guild Members Intent",
|
||||
"channels.discord.voice.enabled": "Discord Voice Enabled",
|
||||
"channels.discord.voice.autoJoin": "Discord Voice Auto-Join",
|
||||
"channels.discord.pluralkit.enabled": "Discord PluralKit Enabled",
|
||||
"channels.discord.pluralkit.token": "Discord PluralKit Token",
|
||||
"channels.discord.activity": "Discord Presence Activity",
|
||||
|
||||
@@ -10,6 +10,7 @@ import type {
|
||||
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
|
||||
import type { DmConfig, ProviderCommandsConfig } from "./types.messages.js";
|
||||
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
|
||||
import type { TtsConfig } from "./types.tts.js";
|
||||
|
||||
export type DiscordDmConfig = {
|
||||
/** If false, ignore all incoming Discord DMs. Default: true. */
|
||||
@@ -103,6 +104,8 @@ export type DiscordVoiceConfig = {
|
||||
enabled?: boolean;
|
||||
/** Voice channels to auto-join on startup. */
|
||||
autoJoin?: DiscordVoiceAutoJoinConfig[];
|
||||
/** Optional TTS overrides for Discord voice output. */
|
||||
tts?: TtsConfig;
|
||||
};
|
||||
|
||||
export type DiscordExecApprovalConfig = {
|
||||
|
||||
@@ -19,6 +19,7 @@ import {
|
||||
ProviderCommandsSchema,
|
||||
ReplyToModeSchema,
|
||||
RetryConfigSchema,
|
||||
TtsConfigSchema,
|
||||
requireOpenAllowFrom,
|
||||
} from "./zod-schema.core.js";
|
||||
import { sensitive } from "./zod-schema.sensitive.js";
|
||||
@@ -279,6 +280,7 @@ const DiscordVoiceSchema = z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
autoJoin: z.array(DiscordVoiceAutoJoinSchema).optional(),
|
||||
tts: TtsConfigSchema,
|
||||
})
|
||||
.strict()
|
||||
.optional();
|
||||
|
||||
@@ -564,7 +564,6 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
|
||||
});
|
||||
voiceManagerRef.current = voiceManager;
|
||||
registerDiscordListener(client.listeners, new DiscordVoiceReadyListener(voiceManager));
|
||||
void voiceManager.autoJoin();
|
||||
}
|
||||
|
||||
const messageHandler = createDiscordMessageHandler({
|
||||
|
||||
@@ -18,7 +18,7 @@ import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import type { MsgContext } from "../../auto-reply/templating.js";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import type { DiscordAccountConfig } from "../../config/types.js";
|
||||
import type { DiscordAccountConfig, TtsConfig } from "../../config/types.js";
|
||||
import type { RuntimeEnv } from "../../runtime.js";
|
||||
import { resolveAgentDir } from "../../agents/agent-scope.js";
|
||||
import { agentCommand } from "../../commands/agent.js";
|
||||
@@ -33,7 +33,7 @@ import {
|
||||
} from "../../media-understanding/runner.js";
|
||||
import { resolveAgentRoute } from "../../routing/resolve-route.js";
|
||||
import { parseTtsDirectives } from "../../tts/tts-core.js";
|
||||
import { textToSpeech, resolveTtsConfig } from "../../tts/tts.js";
|
||||
import { resolveTtsConfig, textToSpeech, type ResolvedTtsConfig } from "../../tts/tts.js";
|
||||
|
||||
const SAMPLE_RATE = 48_000;
|
||||
const CHANNELS = 2;
|
||||
@@ -64,6 +64,56 @@ type VoiceSessionEntry = {
|
||||
stop: () => void;
|
||||
};
|
||||
|
||||
function mergeTtsConfig(base: TtsConfig, override?: TtsConfig): TtsConfig {
|
||||
if (!override) {
|
||||
return base;
|
||||
}
|
||||
return {
|
||||
...base,
|
||||
...override,
|
||||
modelOverrides: {
|
||||
...base.modelOverrides,
|
||||
...override.modelOverrides,
|
||||
},
|
||||
elevenlabs: {
|
||||
...base.elevenlabs,
|
||||
...override.elevenlabs,
|
||||
voiceSettings: {
|
||||
...base.elevenlabs?.voiceSettings,
|
||||
...override.elevenlabs?.voiceSettings,
|
||||
},
|
||||
},
|
||||
openai: {
|
||||
...base.openai,
|
||||
...override.openai,
|
||||
},
|
||||
edge: {
|
||||
...base.edge,
|
||||
...override.edge,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function resolveVoiceTtsConfig(params: { cfg: OpenClawConfig; override?: TtsConfig }): {
|
||||
cfg: OpenClawConfig;
|
||||
resolved: ResolvedTtsConfig;
|
||||
} {
|
||||
if (!params.override) {
|
||||
return { cfg: params.cfg, resolved: resolveTtsConfig(params.cfg) };
|
||||
}
|
||||
const base = params.cfg.messages?.tts ?? {};
|
||||
const merged = mergeTtsConfig(base, params.override);
|
||||
const messages = params.cfg.messages ?? {};
|
||||
const cfg = {
|
||||
...params.cfg,
|
||||
messages: {
|
||||
...messages,
|
||||
tts: merged,
|
||||
},
|
||||
};
|
||||
return { cfg, resolved: resolveTtsConfig(cfg) };
|
||||
}
|
||||
|
||||
function buildWavBuffer(pcm: Buffer): Buffer {
|
||||
const blockAlign = (CHANNELS * BIT_DEPTH) / 8;
|
||||
const byteRate = SAMPLE_RATE * blockAlign;
|
||||
@@ -461,7 +511,10 @@ export class DiscordVoiceManager {
|
||||
return;
|
||||
}
|
||||
|
||||
const ttsConfig = resolveTtsConfig(this.params.cfg);
|
||||
const { cfg: ttsCfg, resolved: ttsConfig } = resolveVoiceTtsConfig({
|
||||
cfg: this.params.cfg,
|
||||
override: this.params.discordConfig.voice?.tts,
|
||||
});
|
||||
const directive = parseTtsDirectives(replyText, ttsConfig.modelOverrides);
|
||||
const speakText = directive.overrides.ttsText ?? directive.cleanedText.trim();
|
||||
if (!speakText) {
|
||||
@@ -470,7 +523,7 @@ export class DiscordVoiceManager {
|
||||
|
||||
const ttsResult = await textToSpeech({
|
||||
text: speakText,
|
||||
cfg: this.params.cfg,
|
||||
cfg: ttsCfg,
|
||||
channel: "discord",
|
||||
overrides: directive.overrides,
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user