mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-28 16:38:17 -05:00
chore: improvements
This commit is contained in:
@@ -34,3 +34,6 @@ NEXT_PUBLIC_PREVIEW_STEALING_DEV=
|
||||
# PostHog Analytics
|
||||
NEXT_PUBLIC_POSTHOG_KEY=
|
||||
NEXT_PUBLIC_POSTHOG_HOST=https://eu.i.posthog.com
|
||||
|
||||
# OpenAI (for voice transcription)
|
||||
OPENAI_API_KEY=
|
||||
|
||||
@@ -73,9 +73,9 @@ export function useSessionsPagination({ enabled }: UseSessionsPaginationArgs) {
|
||||
};
|
||||
|
||||
const reset = () => {
|
||||
// Only reset the offset - keep existing sessions visible during refetch
|
||||
// The effect will replace sessions when new data arrives at offset 0
|
||||
setOffset(0);
|
||||
setAccumulatedSessions([]);
|
||||
setTotalCount(null);
|
||||
};
|
||||
|
||||
return {
|
||||
|
||||
64
autogpt_platform/frontend/src/app/api/transcribe/route.ts
Normal file
64
autogpt_platform/frontend/src/app/api/transcribe/route.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
import { NextRequest, NextResponse } from "next/server";
|
||||
|
||||
const WHISPER_API_URL = "https://api.openai.com/v1/audio/transcriptions";
|
||||
const MAX_FILE_SIZE = 25 * 1024 * 1024; // 25MB - Whisper's limit
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
const apiKey = process.env.OPENAI_API_KEY;
|
||||
|
||||
if (!apiKey) {
|
||||
return NextResponse.json(
|
||||
{ error: "OpenAI API key not configured" },
|
||||
{ status: 401 },
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
const formData = await request.formData();
|
||||
const audioFile = formData.get("audio");
|
||||
|
||||
if (!audioFile || !(audioFile instanceof Blob)) {
|
||||
return NextResponse.json(
|
||||
{ error: "No audio file provided" },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
if (audioFile.size > MAX_FILE_SIZE) {
|
||||
return NextResponse.json(
|
||||
{ error: "File too large. Maximum size is 25MB." },
|
||||
{ status: 413 },
|
||||
);
|
||||
}
|
||||
|
||||
const whisperFormData = new FormData();
|
||||
whisperFormData.append("file", audioFile, "recording.webm");
|
||||
whisperFormData.append("model", "whisper-1");
|
||||
|
||||
const response = await fetch(WHISPER_API_URL, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
body: whisperFormData,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
console.error("Whisper API error:", errorData);
|
||||
return NextResponse.json(
|
||||
{ error: errorData.error?.message || "Transcription failed" },
|
||||
{ status: response.status },
|
||||
);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
return NextResponse.json({ text: result.text });
|
||||
} catch (error) {
|
||||
console.error("Transcription error:", error);
|
||||
return NextResponse.json(
|
||||
{ error: "Failed to process audio" },
|
||||
{ status: 500 },
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,21 @@
|
||||
import { Button } from "@/components/atoms/Button/Button";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { ArrowUpIcon, StopIcon } from "@phosphor-icons/react";
|
||||
import {
|
||||
ArrowUpIcon,
|
||||
CircleNotchIcon,
|
||||
MicrophoneIcon,
|
||||
StopIcon,
|
||||
} from "@phosphor-icons/react";
|
||||
import { KeyboardEvent, useCallback } from "react";
|
||||
import { useChatInput } from "./useChatInput";
|
||||
import { useVoiceRecording } from "./useVoiceRecording";
|
||||
|
||||
function formatElapsedTime(ms: number): string {
|
||||
const seconds = Math.floor(ms / 1000);
|
||||
const minutes = Math.floor(seconds / 60);
|
||||
const remainingSeconds = seconds % 60;
|
||||
return `${minutes}:${remainingSeconds.toString().padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
export interface Props {
|
||||
onSend: (message: string) => void;
|
||||
@@ -21,13 +35,60 @@ export function ChatInput({
|
||||
className,
|
||||
}: Props) {
|
||||
const inputId = "chat-input";
|
||||
const { value, handleKeyDown, handleSubmit, handleChange, hasMultipleLines } =
|
||||
useChatInput({
|
||||
onSend,
|
||||
disabled: disabled || isStreaming,
|
||||
maxRows: 4,
|
||||
inputId,
|
||||
});
|
||||
const {
|
||||
value,
|
||||
setValue,
|
||||
handleKeyDown: baseHandleKeyDown,
|
||||
handleSubmit,
|
||||
handleChange,
|
||||
hasMultipleLines,
|
||||
} = useChatInput({
|
||||
onSend,
|
||||
disabled: disabled || isStreaming,
|
||||
maxRows: 4,
|
||||
inputId,
|
||||
});
|
||||
|
||||
const handleTranscription = useCallback(
|
||||
(text: string) => {
|
||||
setValue((prev) => {
|
||||
const trimmedPrev = prev.trim();
|
||||
if (trimmedPrev) {
|
||||
return `${trimmedPrev} ${text}`;
|
||||
}
|
||||
return text;
|
||||
});
|
||||
},
|
||||
[setValue],
|
||||
);
|
||||
|
||||
const {
|
||||
isRecording,
|
||||
isTranscribing,
|
||||
error: voiceError,
|
||||
elapsedTime,
|
||||
toggleRecording,
|
||||
isSupported: isVoiceSupported,
|
||||
} = useVoiceRecording({
|
||||
onTranscription: handleTranscription,
|
||||
disabled: disabled || isStreaming,
|
||||
});
|
||||
|
||||
const handleKeyDown = useCallback(
|
||||
(event: KeyboardEvent<HTMLTextAreaElement>) => {
|
||||
// Space key toggles recording when input is empty
|
||||
if (event.key === " " && !value.trim() && !isTranscribing) {
|
||||
event.preventDefault();
|
||||
toggleRecording();
|
||||
return;
|
||||
}
|
||||
baseHandleKeyDown(event);
|
||||
},
|
||||
[value, isTranscribing, toggleRecording, baseHandleKeyDown],
|
||||
);
|
||||
|
||||
const showMicButton = isVoiceSupported && !isStreaming;
|
||||
const isInputDisabled = disabled || isStreaming || isTranscribing;
|
||||
|
||||
return (
|
||||
<form onSubmit={handleSubmit} className={cn("relative flex-1", className)}>
|
||||
@@ -35,8 +96,11 @@ export function ChatInput({
|
||||
<div
|
||||
id={`${inputId}-wrapper`}
|
||||
className={cn(
|
||||
"relative overflow-hidden border border-neutral-200 bg-white shadow-sm",
|
||||
"focus-within:border-zinc-400 focus-within:ring-1 focus-within:ring-zinc-400",
|
||||
"relative overflow-hidden border bg-white shadow-sm",
|
||||
"focus-within:ring-1",
|
||||
isRecording
|
||||
? "border-red-400 focus-within:border-red-400 focus-within:ring-red-400"
|
||||
: "border-neutral-200 focus-within:border-zinc-400 focus-within:ring-zinc-400",
|
||||
hasMultipleLines ? "rounded-xlarge" : "rounded-full",
|
||||
)}
|
||||
>
|
||||
@@ -46,48 +110,97 @@ export function ChatInput({
|
||||
value={value}
|
||||
onChange={handleChange}
|
||||
onKeyDown={handleKeyDown}
|
||||
placeholder={placeholder}
|
||||
disabled={disabled || isStreaming}
|
||||
placeholder={
|
||||
isTranscribing
|
||||
? "Transcribing..."
|
||||
: isRecording
|
||||
? "Recording... Press Space or click mic to stop"
|
||||
: placeholder
|
||||
}
|
||||
disabled={isInputDisabled}
|
||||
rows={1}
|
||||
className={cn(
|
||||
"w-full resize-none overflow-y-auto border-0 bg-transparent text-[1rem] leading-6 text-black",
|
||||
"placeholder:text-zinc-400",
|
||||
"focus:outline-none focus:ring-0",
|
||||
"disabled:text-zinc-500",
|
||||
hasMultipleLines ? "pb-6 pl-4 pr-4 pt-2" : "pb-4 pl-4 pr-14 pt-4",
|
||||
hasMultipleLines
|
||||
? "pb-6 pl-4 pr-4 pt-2"
|
||||
: showMicButton
|
||||
? "pb-4 pl-14 pr-14 pt-4"
|
||||
: "pb-4 pl-4 pr-14 pt-4",
|
||||
)}
|
||||
/>
|
||||
</div>
|
||||
<span id="chat-input-hint" className="sr-only">
|
||||
Press Enter to send, Shift+Enter for new line
|
||||
Press Enter to send, Shift+Enter for new line, Space to record voice
|
||||
</span>
|
||||
|
||||
{isStreaming ? (
|
||||
<Button
|
||||
type="button"
|
||||
variant="icon"
|
||||
size="icon"
|
||||
aria-label="Stop generating"
|
||||
onClick={onStop}
|
||||
className="absolute bottom-[7px] right-2 border-red-600 bg-red-600 text-white hover:border-red-800 hover:bg-red-800"
|
||||
>
|
||||
<StopIcon className="h-4 w-4" weight="bold" />
|
||||
</Button>
|
||||
) : (
|
||||
<Button
|
||||
type="submit"
|
||||
variant="icon"
|
||||
size="icon"
|
||||
aria-label="Send message"
|
||||
className={cn(
|
||||
"absolute bottom-[7px] right-2 border-zinc-800 bg-zinc-800 text-white hover:border-zinc-900 hover:bg-zinc-900",
|
||||
(disabled || !value.trim()) && "opacity-20",
|
||||
)}
|
||||
disabled={disabled || !value.trim()}
|
||||
>
|
||||
<ArrowUpIcon className="h-4 w-4" weight="bold" />
|
||||
</Button>
|
||||
{voiceError && (
|
||||
<div className="absolute -top-8 left-0 right-0 text-center text-sm text-red-500">
|
||||
{voiceError}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{showMicButton && (
|
||||
<div className="absolute bottom-[7px] left-2 flex items-center gap-1">
|
||||
<Button
|
||||
type="button"
|
||||
variant="icon"
|
||||
size="icon"
|
||||
aria-label={isRecording ? "Stop recording" : "Start recording"}
|
||||
onClick={toggleRecording}
|
||||
disabled={disabled || isTranscribing}
|
||||
className={cn(
|
||||
isRecording
|
||||
? "animate-pulse border-red-500 bg-red-500 text-white hover:border-red-600 hover:bg-red-600"
|
||||
: isTranscribing
|
||||
? "border-zinc-300 bg-zinc-100 text-zinc-400"
|
||||
: "border-zinc-300 bg-white text-zinc-500 hover:border-zinc-400 hover:bg-zinc-50 hover:text-zinc-700",
|
||||
)}
|
||||
>
|
||||
{isTranscribing ? (
|
||||
<CircleNotchIcon className="h-4 w-4 animate-spin" />
|
||||
) : (
|
||||
<MicrophoneIcon className="h-4 w-4" weight="bold" />
|
||||
)}
|
||||
</Button>
|
||||
{isRecording && (
|
||||
<span className="text-xs font-medium text-red-500">
|
||||
{formatElapsedTime(elapsedTime)}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="absolute bottom-[7px] right-2 flex items-center gap-1">
|
||||
{isStreaming ? (
|
||||
<Button
|
||||
type="button"
|
||||
variant="icon"
|
||||
size="icon"
|
||||
aria-label="Stop generating"
|
||||
onClick={onStop}
|
||||
className="border-red-600 bg-red-600 text-white hover:border-red-800 hover:bg-red-800"
|
||||
>
|
||||
<StopIcon className="h-4 w-4" weight="bold" />
|
||||
</Button>
|
||||
) : (
|
||||
<Button
|
||||
type="submit"
|
||||
variant="icon"
|
||||
size="icon"
|
||||
aria-label="Send message"
|
||||
className={cn(
|
||||
"border-zinc-800 bg-zinc-800 text-white hover:border-zinc-900 hover:bg-zinc-900",
|
||||
(disabled || !value.trim() || isRecording) && "opacity-20",
|
||||
)}
|
||||
disabled={disabled || !value.trim() || isRecording}
|
||||
>
|
||||
<ArrowUpIcon className="h-4 w-4" weight="bold" />
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
);
|
||||
|
||||
@@ -0,0 +1,198 @@
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
|
||||
const MAX_RECORDING_DURATION = 2 * 60 * 1000; // 2 minutes in ms
|
||||
|
||||
interface UseVoiceRecordingArgs {
|
||||
onTranscription: (text: string) => void;
|
||||
disabled?: boolean;
|
||||
}
|
||||
|
||||
interface UseVoiceRecordingReturn {
|
||||
isRecording: boolean;
|
||||
isTranscribing: boolean;
|
||||
error: string | null;
|
||||
elapsedTime: number;
|
||||
startRecording: () => Promise<void>;
|
||||
stopRecording: () => void;
|
||||
toggleRecording: () => void;
|
||||
isSupported: boolean;
|
||||
}
|
||||
|
||||
export function useVoiceRecording({
|
||||
onTranscription,
|
||||
disabled = false,
|
||||
}: UseVoiceRecordingArgs): UseVoiceRecordingReturn {
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
const [isTranscribing, setIsTranscribing] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [elapsedTime, setElapsedTime] = useState(0);
|
||||
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const chunksRef = useRef<Blob[]>([]);
|
||||
const timerRef = useRef<NodeJS.Timeout | null>(null);
|
||||
const startTimeRef = useRef<number>(0);
|
||||
const streamRef = useRef<MediaStream | null>(null);
|
||||
|
||||
const isSupported =
|
||||
typeof window !== "undefined" &&
|
||||
!!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia);
|
||||
|
||||
const clearTimer = useCallback(() => {
|
||||
if (timerRef.current) {
|
||||
clearInterval(timerRef.current);
|
||||
timerRef.current = null;
|
||||
}
|
||||
}, []);
|
||||
|
||||
const cleanup = useCallback(() => {
|
||||
clearTimer();
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((track) => track.stop());
|
||||
streamRef.current = null;
|
||||
}
|
||||
mediaRecorderRef.current = null;
|
||||
chunksRef.current = [];
|
||||
setElapsedTime(0);
|
||||
}, [clearTimer]);
|
||||
|
||||
const transcribeAudio = useCallback(
|
||||
async (audioBlob: Blob) => {
|
||||
setIsTranscribing(true);
|
||||
setError(null);
|
||||
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append("audio", audioBlob);
|
||||
|
||||
const response = await fetch("/api/transcribe", {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const data = await response.json().catch(() => ({}));
|
||||
throw new Error(data.error || "Transcription failed");
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
if (data.text) {
|
||||
onTranscription(data.text);
|
||||
}
|
||||
} catch (err) {
|
||||
const message =
|
||||
err instanceof Error ? err.message : "Transcription failed";
|
||||
setError(message);
|
||||
console.error("Transcription error:", err);
|
||||
} finally {
|
||||
setIsTranscribing(false);
|
||||
}
|
||||
},
|
||||
[onTranscription],
|
||||
);
|
||||
|
||||
const stopRecording = useCallback(() => {
|
||||
if (mediaRecorderRef.current && isRecording) {
|
||||
mediaRecorderRef.current.stop();
|
||||
setIsRecording(false);
|
||||
clearTimer();
|
||||
}
|
||||
}, [isRecording, clearTimer]);
|
||||
|
||||
const startRecording = useCallback(async () => {
|
||||
if (disabled || isRecording || isTranscribing) return;
|
||||
|
||||
setError(null);
|
||||
chunksRef.current = [];
|
||||
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
streamRef.current = stream;
|
||||
|
||||
const mediaRecorder = new MediaRecorder(stream, {
|
||||
mimeType: MediaRecorder.isTypeSupported("audio/webm")
|
||||
? "audio/webm"
|
||||
: "audio/mp4",
|
||||
});
|
||||
|
||||
mediaRecorderRef.current = mediaRecorder;
|
||||
|
||||
mediaRecorder.ondataavailable = (event) => {
|
||||
if (event.data.size > 0) {
|
||||
chunksRef.current.push(event.data);
|
||||
}
|
||||
};
|
||||
|
||||
mediaRecorder.onstop = async () => {
|
||||
const audioBlob = new Blob(chunksRef.current, {
|
||||
type: mediaRecorder.mimeType,
|
||||
});
|
||||
|
||||
// Cleanup stream
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((track) => track.stop());
|
||||
streamRef.current = null;
|
||||
}
|
||||
|
||||
if (audioBlob.size > 0) {
|
||||
await transcribeAudio(audioBlob);
|
||||
}
|
||||
};
|
||||
|
||||
mediaRecorder.start(1000); // Collect data every second
|
||||
setIsRecording(true);
|
||||
startTimeRef.current = Date.now();
|
||||
|
||||
// Start elapsed time timer
|
||||
timerRef.current = setInterval(() => {
|
||||
const elapsed = Date.now() - startTimeRef.current;
|
||||
setElapsedTime(elapsed);
|
||||
|
||||
// Auto-stop at max duration
|
||||
if (elapsed >= MAX_RECORDING_DURATION) {
|
||||
stopRecording();
|
||||
}
|
||||
}, 100);
|
||||
} catch (err) {
|
||||
console.error("Failed to start recording:", err);
|
||||
if (err instanceof DOMException && err.name === "NotAllowedError") {
|
||||
setError("Microphone permission denied");
|
||||
} else {
|
||||
setError("Failed to access microphone");
|
||||
}
|
||||
cleanup();
|
||||
}
|
||||
}, [
|
||||
disabled,
|
||||
isRecording,
|
||||
isTranscribing,
|
||||
stopRecording,
|
||||
transcribeAudio,
|
||||
cleanup,
|
||||
]);
|
||||
|
||||
const toggleRecording = useCallback(() => {
|
||||
if (isRecording) {
|
||||
stopRecording();
|
||||
} else {
|
||||
startRecording();
|
||||
}
|
||||
}, [isRecording, startRecording, stopRecording]);
|
||||
|
||||
// Cleanup on unmount
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
cleanup();
|
||||
};
|
||||
}, [cleanup]);
|
||||
|
||||
return {
|
||||
isRecording,
|
||||
isTranscribing,
|
||||
error,
|
||||
elapsedTime,
|
||||
startRecording,
|
||||
stopRecording,
|
||||
toggleRecording,
|
||||
isSupported,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user