mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
feat(frontend/copilot): add text-to-speech and share output actions (#12256)
## Summary - Add text-to-speech action button to CoPilot assistant messages using the browser Web Speech API - Add share action button that uses the Web Share API with clipboard fallback - Replace inline SVG copy icon with Phosphor CopyIcon for consistency ## Linked Issue SECRT-2052 ## Test plan - [ ] Verify copy button still works - [ ] Click speaker icon and verify TTS reads aloud - [ ] Click stop while playing and verify speech stops - [ ] Click share icon and verify native share or clipboard fallback Note: This PR should be merged after SECRT-2051 PR --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
import {
|
||||
PromptInputBody,
|
||||
PromptInputButton,
|
||||
PromptInputFooter,
|
||||
PromptInputSubmit,
|
||||
PromptInputTextarea,
|
||||
@@ -8,10 +7,10 @@ import {
|
||||
} from "@/components/ai-elements/prompt-input";
|
||||
import { InputGroup } from "@/components/ui/input-group";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { CircleNotchIcon, MicrophoneIcon } from "@phosphor-icons/react";
|
||||
import { ChangeEvent, useEffect, useState } from "react";
|
||||
import { AttachmentMenu } from "./components/AttachmentMenu";
|
||||
import { FileChips } from "./components/FileChips";
|
||||
import { RecordingButton } from "./components/RecordingButton";
|
||||
import { RecordingIndicator } from "./components/RecordingIndicator";
|
||||
import { useChatInput } from "./useChatInput";
|
||||
import { useVoiceRecording } from "./useVoiceRecording";
|
||||
@@ -153,24 +152,13 @@ export function ChatInput({
|
||||
disabled={isBusy}
|
||||
/>
|
||||
{showMicButton && (
|
||||
<PromptInputButton
|
||||
aria-label={isRecording ? "Stop recording" : "Start recording"}
|
||||
onClick={toggleRecording}
|
||||
<RecordingButton
|
||||
isRecording={isRecording}
|
||||
isTranscribing={isTranscribing}
|
||||
isStreaming={isStreaming}
|
||||
disabled={disabled || isTranscribing || isStreaming}
|
||||
className={cn(
|
||||
"size-[2.625rem] rounded-[96px] border border-zinc-300 bg-transparent text-black hover:border-zinc-600 hover:bg-zinc-100",
|
||||
isRecording &&
|
||||
"animate-pulse border-red-500 bg-red-500 text-white hover:border-red-600 hover:bg-red-600",
|
||||
isTranscribing && "bg-zinc-100 text-zinc-400",
|
||||
isStreaming && "opacity-40",
|
||||
)}
|
||||
>
|
||||
{isTranscribing ? (
|
||||
<CircleNotchIcon className="h-4 w-4 animate-spin" />
|
||||
) : (
|
||||
<MicrophoneIcon className="h-4 w-4" weight="bold" />
|
||||
)}
|
||||
</PromptInputButton>
|
||||
onClick={toggleRecording}
|
||||
/>
|
||||
)}
|
||||
</PromptInputTools>
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ export function FileChips({ files, onRemove, isUploading }: Props) {
|
||||
if (files.length === 0) return null;
|
||||
|
||||
return (
|
||||
<div className="flex w-full flex-wrap gap-2 px-3 pb-2 pt-1">
|
||||
<div className="flex w-full flex-wrap gap-2 px-3 pb-2 pt-2">
|
||||
{files.map((file, index) => (
|
||||
<span
|
||||
key={`${file.name}-${file.size}-${index}`}
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
"use client";
|
||||
|
||||
import { Button } from "@/components/atoms/Button/Button";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { CircleNotchIcon, MicrophoneIcon } from "@phosphor-icons/react";
|
||||
|
||||
interface Props {
|
||||
isRecording: boolean;
|
||||
isTranscribing: boolean;
|
||||
isStreaming: boolean;
|
||||
disabled: boolean;
|
||||
onClick: () => void;
|
||||
}
|
||||
|
||||
export function RecordingButton({
|
||||
isRecording,
|
||||
isTranscribing,
|
||||
isStreaming,
|
||||
disabled,
|
||||
onClick,
|
||||
}: Props) {
|
||||
return (
|
||||
<Button
|
||||
type="button"
|
||||
variant="icon"
|
||||
size="icon"
|
||||
aria-label={isRecording ? "Stop recording" : "Start recording"}
|
||||
disabled={disabled}
|
||||
onClick={onClick}
|
||||
className={cn(
|
||||
"border-zinc-300 bg-white text-zinc-500 hover:border-zinc-400 hover:bg-zinc-50 hover:text-zinc-700",
|
||||
disabled && "opacity-40",
|
||||
isRecording &&
|
||||
"animate-pulse border-red-500 bg-red-500 text-white hover:border-red-600 hover:bg-red-600",
|
||||
isTranscribing && "bg-zinc-100 text-zinc-400",
|
||||
isStreaming && "opacity-40",
|
||||
)}
|
||||
>
|
||||
{isTranscribing ? (
|
||||
<CircleNotchIcon className="h-4 w-4 animate-spin" weight="bold" />
|
||||
) : (
|
||||
<MicrophoneIcon className="h-4 w-4" weight="bold" />
|
||||
)}
|
||||
</Button>
|
||||
);
|
||||
}
|
||||
@@ -3,12 +3,19 @@ import {
|
||||
ConversationContent,
|
||||
ConversationScrollButton,
|
||||
} from "@/components/ai-elements/conversation";
|
||||
import { Message, MessageContent } from "@/components/ai-elements/message";
|
||||
import {
|
||||
Message,
|
||||
MessageActions,
|
||||
MessageContent,
|
||||
} from "@/components/ai-elements/message";
|
||||
import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
|
||||
import { FileUIPart, UIDataTypes, UIMessage, UITools } from "ai";
|
||||
import { MessageAttachments } from "./components/MessageAttachments";
|
||||
import { MessagePartRenderer } from "./components/MessagePartRenderer";
|
||||
import { ThinkingIndicator } from "./components/ThinkingIndicator";
|
||||
import { CopyButton } from "./components/CopyButton";
|
||||
import { TTSButton } from "./components/TTSButton";
|
||||
import { parseSpecialMarkers } from "./helpers";
|
||||
|
||||
interface Props {
|
||||
messages: UIMessage<unknown, UIDataTypes, UITools>[];
|
||||
@@ -73,6 +80,15 @@ export function ChatMessagesContainer({
|
||||
messageIndex === messages.length - 1 &&
|
||||
message.role === "assistant";
|
||||
|
||||
const isAssistant = message.role === "assistant";
|
||||
|
||||
// Past assistant messages are always done; the last one
|
||||
// is done only when the stream has finished.
|
||||
const isAssistantDone =
|
||||
isAssistant &&
|
||||
(!isLastAssistant ||
|
||||
(status !== "streaming" && status !== "submitted"));
|
||||
|
||||
const fileParts = message.parts.filter(
|
||||
(p): p is FileUIPart => p.type === "file",
|
||||
);
|
||||
@@ -104,6 +120,30 @@ export function ChatMessagesContainer({
|
||||
isUser={message.role === "user"}
|
||||
/>
|
||||
)}
|
||||
{isAssistantDone &&
|
||||
(() => {
|
||||
const textParts = message.parts.filter(
|
||||
(p): p is Extract<typeof p, { type: "text" }> =>
|
||||
p.type === "text",
|
||||
);
|
||||
|
||||
// Hide actions when the message ended with an error or cancellation
|
||||
const lastTextPart = textParts[textParts.length - 1];
|
||||
if (lastTextPart) {
|
||||
const { markerType } = parseSpecialMarkers(
|
||||
lastTextPart.text,
|
||||
);
|
||||
if (markerType === "error") return null;
|
||||
}
|
||||
|
||||
const textContent = textParts.map((p) => p.text).join("\n");
|
||||
return (
|
||||
<MessageActions>
|
||||
<CopyButton text={textContent} />
|
||||
<TTSButton text={textContent} />
|
||||
</MessageActions>
|
||||
);
|
||||
})()}
|
||||
</Message>
|
||||
);
|
||||
})}
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
"use client";
|
||||
|
||||
import { MessageAction } from "@/components/ai-elements/message";
|
||||
import { toast } from "@/components/molecules/Toast/use-toast";
|
||||
import { Check, Copy } from "@phosphor-icons/react";
|
||||
import { useState } from "react";
|
||||
|
||||
interface Props {
|
||||
text: string;
|
||||
}
|
||||
|
||||
export function CopyButton({ text }: Props) {
|
||||
const [copied, setCopied] = useState(false);
|
||||
|
||||
if (!text.trim()) return null;
|
||||
|
||||
async function handleCopy() {
|
||||
try {
|
||||
await navigator.clipboard.writeText(text);
|
||||
setCopied(true);
|
||||
setTimeout(() => setCopied(false), 2000);
|
||||
} catch {
|
||||
toast({
|
||||
title: "Failed to copy",
|
||||
description:
|
||||
"Your browser may not support clipboard access, or something went wrong.",
|
||||
variant: "destructive",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<MessageAction
|
||||
tooltip={copied ? "Copied!" : "Copy to clipboard"}
|
||||
onClick={handleCopy}
|
||||
>
|
||||
{copied ? <Check size={16} /> : <Copy size={16} />}
|
||||
</MessageAction>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
"use client";
|
||||
|
||||
import { MessageAction } from "@/components/ai-elements/message";
|
||||
import { SpeakerHigh, Stop } from "@phosphor-icons/react";
|
||||
import { useTextToSpeech } from "@/components/contextual/Chat/components/ChatMessage/useTextToSpeech";
|
||||
import { useMemo } from "react";
|
||||
|
||||
// Unicode emoji pattern (covers most emoji ranges including modifiers and ZWJ sequences)
|
||||
const EMOJI_RE =
|
||||
/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{FE00}-\u{FE0F}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FA6F}\u{1FA70}-\u{1FAFF}\u{200D}\u{20E3}\u{E0020}-\u{E007F}]+/gu;
|
||||
|
||||
function stripMarkdownForSpeech(md: string): string {
|
||||
return (
|
||||
md
|
||||
// Code blocks (``` ... ```)
|
||||
.replace(/```[\s\S]*?```/g, "")
|
||||
// Inline code
|
||||
.replace(/`([^`]*)`/g, "$1")
|
||||
// Images 
|
||||
.replace(/!\[([^\]]*)\]\([^)]*\)/g, "$1")
|
||||
// Links [text](url)
|
||||
.replace(/\[([^\]]*)\]\([^)]*\)/g, "$1")
|
||||
// Bold/italic (***text***, **text**, *text*, ___text___, __text__, _text_)
|
||||
.replace(/\*{1,3}([^*]+)\*{1,3}/g, "$1")
|
||||
.replace(/_{1,3}([^_]+)_{1,3}/g, "$1")
|
||||
// Strikethrough
|
||||
.replace(/~~([^~]+)~~/g, "$1")
|
||||
// Headings (# ... ######)
|
||||
.replace(/^#{1,6}\s+/gm, "")
|
||||
// Horizontal rules
|
||||
.replace(/^[-*_]{3,}\s*$/gm, "")
|
||||
// Blockquotes
|
||||
.replace(/^>\s?/gm, "")
|
||||
// Unordered list markers
|
||||
.replace(/^[\s]*[-*+]\s+/gm, "")
|
||||
// Ordered list markers
|
||||
.replace(/^[\s]*\d+\.\s+/gm, "")
|
||||
// HTML tags
|
||||
.replace(/<[^>]+>/g, "")
|
||||
// Emoji
|
||||
.replace(EMOJI_RE, "")
|
||||
// Collapse multiple blank lines
|
||||
.replace(/\n{3,}/g, "\n\n")
|
||||
.trim()
|
||||
);
|
||||
}
|
||||
|
||||
interface Props {
|
||||
text: string;
|
||||
}
|
||||
|
||||
export function TTSButton({ text }: Props) {
|
||||
const cleanText = useMemo(() => stripMarkdownForSpeech(text), [text]);
|
||||
const { status, isSupported, toggle } = useTextToSpeech(cleanText);
|
||||
|
||||
if (!isSupported || !cleanText) return null;
|
||||
|
||||
const isPlaying = status === "playing";
|
||||
|
||||
return (
|
||||
<MessageAction
|
||||
tooltip={isPlaying ? "Stop reading" : "Read aloud"}
|
||||
onClick={toggle}
|
||||
>
|
||||
{isPlaying ? <Stop size={16} /> : <SpeakerHigh size={16} />}
|
||||
</MessageAction>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,120 @@
|
||||
"use client";
|
||||
|
||||
import { useEffect, useRef, useState } from "react";
|
||||
|
||||
type TTSStatus = "idle" | "playing" | "paused";
|
||||
|
||||
/**
|
||||
* Preferred voice names ranked by quality.
|
||||
* The first match found in the browser's available voices wins.
|
||||
*/
|
||||
const PREFERRED_VOICES = [
|
||||
// macOS high-quality
|
||||
"Samantha",
|
||||
"Karen",
|
||||
"Daniel",
|
||||
// Chrome / Android
|
||||
"Google UK English Female",
|
||||
"Google UK English Male",
|
||||
"Google US English",
|
||||
// Edge / Windows
|
||||
"Microsoft Zira",
|
||||
"Microsoft David",
|
||||
];
|
||||
|
||||
function pickBestVoice(): SpeechSynthesisVoice | undefined {
|
||||
const voices = window.speechSynthesis.getVoices();
|
||||
for (const name of PREFERRED_VOICES) {
|
||||
const match = voices.find((v) => v.name.includes(name));
|
||||
if (match) return match;
|
||||
}
|
||||
// Fallback: prefer any voice flagged as default, or the first English voice
|
||||
return (
|
||||
voices.find((v) => v.default) ||
|
||||
voices.find((v) => v.lang.startsWith("en")) ||
|
||||
voices[0]
|
||||
);
|
||||
}
|
||||
|
||||
export function useTextToSpeech(text: string) {
|
||||
const [status, setStatus] = useState<TTSStatus>("idle");
|
||||
const [isSupported, setIsSupported] = useState(false);
|
||||
const utteranceRef = useRef<SpeechSynthesisUtterance | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
setIsSupported("speechSynthesis" in window);
|
||||
return () => {
|
||||
window.speechSynthesis?.cancel();
|
||||
};
|
||||
}, []);
|
||||
|
||||
// Reset state when text changes (e.g. navigating between messages)
|
||||
useEffect(() => {
|
||||
window.speechSynthesis?.cancel();
|
||||
utteranceRef.current = null;
|
||||
setStatus("idle");
|
||||
}, [text]);
|
||||
|
||||
function play() {
|
||||
if (typeof window === "undefined" || !window.speechSynthesis) return;
|
||||
|
||||
if (status === "paused") {
|
||||
window.speechSynthesis.resume();
|
||||
setStatus("playing");
|
||||
return;
|
||||
}
|
||||
|
||||
// Cancel any ongoing speech first
|
||||
window.speechSynthesis.cancel();
|
||||
|
||||
const utterance = new SpeechSynthesisUtterance(text);
|
||||
|
||||
const voice = pickBestVoice();
|
||||
if (voice) utterance.voice = voice;
|
||||
|
||||
utteranceRef.current = utterance;
|
||||
|
||||
utterance.onend = () => {
|
||||
setStatus("idle");
|
||||
utteranceRef.current = null;
|
||||
};
|
||||
|
||||
utterance.onerror = () => {
|
||||
setStatus("idle");
|
||||
utteranceRef.current = null;
|
||||
};
|
||||
|
||||
window.speechSynthesis.speak(utterance);
|
||||
setStatus("playing");
|
||||
}
|
||||
|
||||
function pause() {
|
||||
if (typeof window === "undefined" || !window.speechSynthesis) return;
|
||||
window.speechSynthesis.pause();
|
||||
setStatus("paused");
|
||||
}
|
||||
|
||||
function stop() {
|
||||
if (typeof window === "undefined" || !window.speechSynthesis) return;
|
||||
window.speechSynthesis.cancel();
|
||||
utteranceRef.current = null;
|
||||
setStatus("idle");
|
||||
}
|
||||
|
||||
function toggle() {
|
||||
if (status === "playing") {
|
||||
stop();
|
||||
} else {
|
||||
play();
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
status,
|
||||
isSupported,
|
||||
play,
|
||||
pause,
|
||||
stop,
|
||||
toggle,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user