Discord: add voice docs and TTS overrides

This commit is contained in:
Shadow
2026-02-16 19:06:18 -06:00
parent 3d24b92d85
commit b78b165c38
8 changed files with 120 additions and 5 deletions

View File

@@ -603,6 +603,47 @@ Example:
}
```
## Voice channels
OpenClaw can join Discord voice channels for realtime, continuous conversations. This is separate from voice message attachments.
Requirements:
- Enable native commands (`commands.native` or `channels.discord.commands.native`).
- Configure `channels.discord.voice`.
- The bot needs Connect + Speak permissions in the target voice channel.
Use the Discord-only native command `/vc join|leave|status` to control sessions. The command uses the account default agent and follows the same allowlist and group policy rules as other Discord commands.
Auto-join example:
```json5
{
channels: {
discord: {
voice: {
enabled: true,
autoJoin: [
{
guildId: "123456789012345678",
channelId: "234567890123456789",
},
],
tts: {
provider: "openai",
openai: { voice: "alloy" },
},
},
},
},
}
```
Notes:
- `voice.tts` overrides `messages.tts` for voice playback only.
- Omit the `voice` block to keep voice support disabled for the account.
## Voice messages
Discord voice messages show a waveform preview and require OGG/Opus audio plus metadata. OpenClaw generates the waveform automatically, but it needs `ffmpeg` and `ffprobe` available on the gateway host to inspect and convert audio files.

View File

@@ -216,6 +216,19 @@ WhatsApp runs through the gateway's web channel (Baileys Web). It starts automat
accentColor: "#5865F2",
},
},
voice: {
enabled: true,
autoJoin: [
{
guildId: "123456789012345678",
channelId: "234567890123456789",
},
],
tts: {
provider: "openai",
openai: { voice: "alloy" },
},
},
retry: {
attempts: 3,
minDelayMs: 500,
@@ -233,6 +246,7 @@ WhatsApp runs through the gateway's web channel (Baileys Web). It starts automat
- Bot-authored messages are ignored by default. `allowBots: true` enables them (own messages still filtered).
- `maxLinesPerMessage` (default 17) splits tall messages even when under 2000 chars.
- `channels.discord.ui.components.accentColor` sets the accent color for Discord components v2 containers.
- `channels.discord.voice` enables Discord voice channel conversations and optional auto-join + TTS overrides.
**Reaction notification modes:** `off` (none), `own` (bot's messages, default), `all` (all messages), `allowlist` (from `guilds.<id>.users` on all messages).

View File

@@ -118,6 +118,7 @@ Notes:
- `/allowlist add|remove` requires `commands.config=true` and honors channel `configWrites`.
- `/usage` controls the per-response usage footer; `/usage cost` prints a local cost summary from OpenClaw session logs.
- `/restart` is disabled by default; set `commands.restart: true` to enable it.
- Discord-only native command: `/vc join|leave|status` controls voice channels (requires `channels.discord.voice` and native commands; not available as text).
- `/verbose` is meant for debugging and extra visibility; keep it **off** in normal use.
- `/reasoning` (and `/verbose`) are risky in group settings: they may reveal internal reasoning or tool output you did not intend to expose. Prefer leaving them off, especially in group chats.
- **Fast path:** command-only messages from allowlisted senders are handled immediately (bypass queue + model).

View File

@@ -265,6 +265,8 @@ export const FIELD_LABELS: Record<string, string> = {
"channels.discord.ui.components.accentColor": "Discord Component Accent Color",
"channels.discord.intents.presence": "Discord Presence Intent",
"channels.discord.intents.guildMembers": "Discord Guild Members Intent",
"channels.discord.voice.enabled": "Discord Voice Enabled",
"channels.discord.voice.autoJoin": "Discord Voice Auto-Join",
"channels.discord.pluralkit.enabled": "Discord PluralKit Enabled",
"channels.discord.pluralkit.token": "Discord PluralKit Token",
"channels.discord.activity": "Discord Presence Activity",

View File

@@ -10,6 +10,7 @@ import type {
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
import type { DmConfig, ProviderCommandsConfig } from "./types.messages.js";
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
import type { TtsConfig } from "./types.tts.js";
export type DiscordDmConfig = {
/** If false, ignore all incoming Discord DMs. Default: true. */
@@ -103,6 +104,8 @@ export type DiscordVoiceConfig = {
enabled?: boolean;
/** Voice channels to auto-join on startup. */
autoJoin?: DiscordVoiceAutoJoinConfig[];
/** Optional TTS overrides for Discord voice output. */
tts?: TtsConfig;
};
export type DiscordExecApprovalConfig = {

View File

@@ -19,6 +19,7 @@ import {
ProviderCommandsSchema,
ReplyToModeSchema,
RetryConfigSchema,
TtsConfigSchema,
requireOpenAllowFrom,
} from "./zod-schema.core.js";
import { sensitive } from "./zod-schema.sensitive.js";
@@ -279,6 +280,7 @@ const DiscordVoiceSchema = z
.object({
enabled: z.boolean().optional(),
autoJoin: z.array(DiscordVoiceAutoJoinSchema).optional(),
tts: TtsConfigSchema,
})
.strict()
.optional();

View File

@@ -564,7 +564,6 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
});
voiceManagerRef.current = voiceManager;
registerDiscordListener(client.listeners, new DiscordVoiceReadyListener(voiceManager));
void voiceManager.autoJoin();
}
const messageHandler = createDiscordMessageHandler({

View File

@@ -18,7 +18,7 @@ import fs from "node:fs/promises";
import path from "node:path";
import type { MsgContext } from "../../auto-reply/templating.js";
import type { OpenClawConfig } from "../../config/config.js";
import type { DiscordAccountConfig } from "../../config/types.js";
import type { DiscordAccountConfig, TtsConfig } from "../../config/types.js";
import type { RuntimeEnv } from "../../runtime.js";
import { resolveAgentDir } from "../../agents/agent-scope.js";
import { agentCommand } from "../../commands/agent.js";
@@ -33,7 +33,7 @@ import {
} from "../../media-understanding/runner.js";
import { resolveAgentRoute } from "../../routing/resolve-route.js";
import { parseTtsDirectives } from "../../tts/tts-core.js";
import { textToSpeech, resolveTtsConfig } from "../../tts/tts.js";
import { resolveTtsConfig, textToSpeech, type ResolvedTtsConfig } from "../../tts/tts.js";
const SAMPLE_RATE = 48_000;
const CHANNELS = 2;
@@ -64,6 +64,56 @@ type VoiceSessionEntry = {
stop: () => void;
};
function mergeTtsConfig(base: TtsConfig, override?: TtsConfig): TtsConfig {
if (!override) {
return base;
}
return {
...base,
...override,
modelOverrides: {
...base.modelOverrides,
...override.modelOverrides,
},
elevenlabs: {
...base.elevenlabs,
...override.elevenlabs,
voiceSettings: {
...base.elevenlabs?.voiceSettings,
...override.elevenlabs?.voiceSettings,
},
},
openai: {
...base.openai,
...override.openai,
},
edge: {
...base.edge,
...override.edge,
},
};
}
function resolveVoiceTtsConfig(params: { cfg: OpenClawConfig; override?: TtsConfig }): {
cfg: OpenClawConfig;
resolved: ResolvedTtsConfig;
} {
if (!params.override) {
return { cfg: params.cfg, resolved: resolveTtsConfig(params.cfg) };
}
const base = params.cfg.messages?.tts ?? {};
const merged = mergeTtsConfig(base, params.override);
const messages = params.cfg.messages ?? {};
const cfg = {
...params.cfg,
messages: {
...messages,
tts: merged,
},
};
return { cfg, resolved: resolveTtsConfig(cfg) };
}
function buildWavBuffer(pcm: Buffer): Buffer {
const blockAlign = (CHANNELS * BIT_DEPTH) / 8;
const byteRate = SAMPLE_RATE * blockAlign;
@@ -461,7 +511,10 @@ export class DiscordVoiceManager {
return;
}
const ttsConfig = resolveTtsConfig(this.params.cfg);
const { cfg: ttsCfg, resolved: ttsConfig } = resolveVoiceTtsConfig({
cfg: this.params.cfg,
override: this.params.discordConfig.voice?.tts,
});
const directive = parseTtsDirectives(replyText, ttsConfig.modelOverrides);
const speakText = directive.overrides.ttsText ?? directive.cleanedText.trim();
if (!speakText) {
@@ -470,7 +523,7 @@ export class DiscordVoiceManager {
const ttsResult = await textToSpeech({
text: speakText,
cfg: this.params.cfg,
cfg: ttsCfg,
channel: "discord",
overrides: directive.overrides,
});