mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-29 08:58:07 -05:00
Merge branch 'dev' into swiftyos/sse-long-running-tasks
This commit is contained in:
24
AGENTS.md
24
AGENTS.md
@@ -16,7 +16,6 @@ See `docs/content/platform/getting-started.md` for setup instructions.
|
||||
- Format Python code with `poetry run format`.
|
||||
- Format frontend code using `pnpm format`.
|
||||
|
||||
|
||||
## Frontend guidelines:
|
||||
|
||||
See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference:
|
||||
@@ -33,14 +32,17 @@ See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference:
|
||||
4. **Styling**: Tailwind CSS only, use design tokens, Phosphor Icons only
|
||||
5. **Testing**: Add Storybook stories for new components, Playwright for E2E
|
||||
6. **Code conventions**: Function declarations (not arrow functions) for components/handlers
|
||||
|
||||
- Component props should be `interface Props { ... }` (not exported) unless the interface needs to be used outside the component
|
||||
- Separate render logic from business logic (component.tsx + useComponent.ts + helpers.ts)
|
||||
- Colocate state when possible and avoid creating large components, use sub-components ( local `/components` folder next to the parent component ) when sensible
|
||||
- Avoid large hooks, abstract logic into `helpers.ts` files when sensible
|
||||
- Use function declarations for components, arrow functions only for callbacks
|
||||
- No barrel files or `index.ts` re-exports
|
||||
- Do not use `useCallback` or `useMemo` unless strictly needed
|
||||
- Avoid comments at all times unless the code is very complex
|
||||
- Do not use `useCallback` or `useMemo` unless asked to optimise a given function
|
||||
- Do not type hook returns, let Typescript infer as much as possible
|
||||
- Never type with `any`, if not types available use `unknown`
|
||||
|
||||
## Testing
|
||||
|
||||
@@ -49,22 +51,8 @@ See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference:
|
||||
|
||||
Always run the relevant linters and tests before committing.
|
||||
Use conventional commit messages for all commits (e.g. `feat(backend): add API`).
|
||||
Types:
|
||||
- feat
|
||||
- fix
|
||||
- refactor
|
||||
- ci
|
||||
- dx (developer experience)
|
||||
Scopes:
|
||||
- platform
|
||||
- platform/library
|
||||
- platform/marketplace
|
||||
- backend
|
||||
- backend/executor
|
||||
- frontend
|
||||
- frontend/library
|
||||
- frontend/marketplace
|
||||
- blocks
|
||||
Types: - feat - fix - refactor - ci - dx (developer experience)
|
||||
Scopes: - platform - platform/library - platform/marketplace - backend - backend/executor - frontend - frontend/library - frontend/marketplace - blocks
|
||||
|
||||
## Pull requests
|
||||
|
||||
|
||||
@@ -85,17 +85,6 @@ pnpm format
|
||||
pnpm types
|
||||
```
|
||||
|
||||
**📖 Complete Guide**: See `/frontend/CONTRIBUTING.md` and `/frontend/.cursorrules` for comprehensive frontend patterns.
|
||||
|
||||
**Key Frontend Conventions:**
|
||||
|
||||
- Separate render logic from data/behavior in components
|
||||
- Use generated API hooks from `@/app/api/__generated__/endpoints/`
|
||||
- Use function declarations (not arrow functions) for components/handlers
|
||||
- Use design system components from `src/components/` (atoms, molecules, organisms)
|
||||
- Only use Phosphor Icons
|
||||
- Never use `src/components/__legacy__/*` or deprecated `BackendAPI`
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
### Backend Architecture
|
||||
@@ -261,14 +250,17 @@ See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference:
|
||||
4. **Styling**: Tailwind CSS only, use design tokens, Phosphor Icons only
|
||||
5. **Testing**: Add Storybook stories for new components, Playwright for E2E
|
||||
6. **Code conventions**: Function declarations (not arrow functions) for components/handlers
|
||||
|
||||
- Component props should be `interface Props { ... }` (not exported) unless the interface needs to be used outside the component
|
||||
- Separate render logic from business logic (component.tsx + useComponent.ts + helpers.ts)
|
||||
- Colocate state when possible and avoid creating large components, use sub-components ( local `/components` folder next to the parent component ) when sensible
|
||||
- Avoid large hooks, abstract logic into `helpers.ts` files when sensible
|
||||
- Use function declarations for components, arrow functions only for callbacks
|
||||
- No barrel files or `index.ts` re-exports
|
||||
- Do not use `useCallback` or `useMemo` unless strictly needed
|
||||
- Do not use `useCallback` or `useMemo` unless asked to optimise a given function
|
||||
- Avoid comments at all times unless the code is very complex
|
||||
- Do not type hook returns, let Typescript infer as much as possible
|
||||
- Never type with `any`, if not types available use `unknown`
|
||||
|
||||
### Security Implementation
|
||||
|
||||
|
||||
@@ -34,3 +34,6 @@ NEXT_PUBLIC_PREVIEW_STEALING_DEV=
|
||||
# PostHog Analytics
|
||||
NEXT_PUBLIC_POSTHOG_KEY=
|
||||
NEXT_PUBLIC_POSTHOG_HOST=https://eu.i.posthog.com
|
||||
|
||||
# OpenAI (for voice transcription)
|
||||
OPENAI_API_KEY=
|
||||
|
||||
@@ -73,9 +73,9 @@ export function useSessionsPagination({ enabled }: UseSessionsPaginationArgs) {
|
||||
};
|
||||
|
||||
const reset = () => {
|
||||
// Only reset the offset - keep existing sessions visible during refetch
|
||||
// The effect will replace sessions when new data arrives at offset 0
|
||||
setOffset(0);
|
||||
setAccumulatedSessions([]);
|
||||
setTotalCount(null);
|
||||
};
|
||||
|
||||
return {
|
||||
|
||||
77
autogpt_platform/frontend/src/app/api/transcribe/route.ts
Normal file
77
autogpt_platform/frontend/src/app/api/transcribe/route.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
import { getServerAuthToken } from "@/lib/autogpt-server-api/helpers";
|
||||
import { NextRequest, NextResponse } from "next/server";
|
||||
|
||||
const WHISPER_API_URL = "https://api.openai.com/v1/audio/transcriptions";
|
||||
const MAX_FILE_SIZE = 25 * 1024 * 1024; // 25MB - Whisper's limit
|
||||
|
||||
function getExtensionFromMimeType(mimeType: string): string {
|
||||
const subtype = mimeType.split("/")[1]?.split(";")[0];
|
||||
return subtype || "webm";
|
||||
}
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
const token = await getServerAuthToken();
|
||||
|
||||
if (!token || token === "no-token-found") {
|
||||
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
|
||||
}
|
||||
|
||||
const apiKey = process.env.OPENAI_API_KEY;
|
||||
|
||||
if (!apiKey) {
|
||||
return NextResponse.json(
|
||||
{ error: "OpenAI API key not configured" },
|
||||
{ status: 401 },
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
const formData = await request.formData();
|
||||
const audioFile = formData.get("audio");
|
||||
|
||||
if (!audioFile || !(audioFile instanceof Blob)) {
|
||||
return NextResponse.json(
|
||||
{ error: "No audio file provided" },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
if (audioFile.size > MAX_FILE_SIZE) {
|
||||
return NextResponse.json(
|
||||
{ error: "File too large. Maximum size is 25MB." },
|
||||
{ status: 413 },
|
||||
);
|
||||
}
|
||||
|
||||
const ext = getExtensionFromMimeType(audioFile.type);
|
||||
const whisperFormData = new FormData();
|
||||
whisperFormData.append("file", audioFile, `recording.${ext}`);
|
||||
whisperFormData.append("model", "whisper-1");
|
||||
|
||||
const response = await fetch(WHISPER_API_URL, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
body: whisperFormData,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
console.error("Whisper API error:", errorData);
|
||||
return NextResponse.json(
|
||||
{ error: errorData.error?.message || "Transcription failed" },
|
||||
{ status: response.status },
|
||||
);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
return NextResponse.json({ text: result.text });
|
||||
} catch (error) {
|
||||
console.error("Transcription error:", error);
|
||||
return NextResponse.json(
|
||||
{ error: "Failed to process audio" },
|
||||
{ status: 500 },
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,14 @@
|
||||
import { Button } from "@/components/atoms/Button/Button";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { ArrowUpIcon, StopIcon } from "@phosphor-icons/react";
|
||||
import {
|
||||
ArrowUpIcon,
|
||||
CircleNotchIcon,
|
||||
MicrophoneIcon,
|
||||
StopIcon,
|
||||
} from "@phosphor-icons/react";
|
||||
import { RecordingIndicator } from "./components/RecordingIndicator";
|
||||
import { useChatInput } from "./useChatInput";
|
||||
import { useVoiceRecording } from "./useVoiceRecording";
|
||||
|
||||
export interface Props {
|
||||
onSend: (message: string) => void;
|
||||
@@ -21,13 +28,36 @@ export function ChatInput({
|
||||
className,
|
||||
}: Props) {
|
||||
const inputId = "chat-input";
|
||||
const { value, handleKeyDown, handleSubmit, handleChange, hasMultipleLines } =
|
||||
useChatInput({
|
||||
onSend,
|
||||
disabled: disabled || isStreaming,
|
||||
maxRows: 4,
|
||||
inputId,
|
||||
});
|
||||
const {
|
||||
value,
|
||||
setValue,
|
||||
handleKeyDown: baseHandleKeyDown,
|
||||
handleSubmit,
|
||||
handleChange,
|
||||
hasMultipleLines,
|
||||
} = useChatInput({
|
||||
onSend,
|
||||
disabled: disabled || isStreaming,
|
||||
maxRows: 4,
|
||||
inputId,
|
||||
});
|
||||
|
||||
const {
|
||||
isRecording,
|
||||
isTranscribing,
|
||||
elapsedTime,
|
||||
toggleRecording,
|
||||
handleKeyDown,
|
||||
showMicButton,
|
||||
isInputDisabled,
|
||||
audioStream,
|
||||
} = useVoiceRecording({
|
||||
setValue,
|
||||
disabled: disabled || isStreaming,
|
||||
isStreaming,
|
||||
value,
|
||||
baseHandleKeyDown,
|
||||
});
|
||||
|
||||
return (
|
||||
<form onSubmit={handleSubmit} className={cn("relative flex-1", className)}>
|
||||
@@ -35,8 +65,11 @@ export function ChatInput({
|
||||
<div
|
||||
id={`${inputId}-wrapper`}
|
||||
className={cn(
|
||||
"relative overflow-hidden border border-neutral-200 bg-white shadow-sm",
|
||||
"focus-within:border-zinc-400 focus-within:ring-1 focus-within:ring-zinc-400",
|
||||
"relative overflow-hidden border bg-white shadow-sm",
|
||||
"focus-within:ring-1",
|
||||
isRecording
|
||||
? "border-red-400 focus-within:border-red-400 focus-within:ring-red-400"
|
||||
: "border-neutral-200 focus-within:border-zinc-400 focus-within:ring-zinc-400",
|
||||
hasMultipleLines ? "rounded-xlarge" : "rounded-full",
|
||||
)}
|
||||
>
|
||||
@@ -46,48 +79,94 @@ export function ChatInput({
|
||||
value={value}
|
||||
onChange={handleChange}
|
||||
onKeyDown={handleKeyDown}
|
||||
placeholder={placeholder}
|
||||
disabled={disabled || isStreaming}
|
||||
placeholder={
|
||||
isTranscribing
|
||||
? "Transcribing..."
|
||||
: isRecording
|
||||
? ""
|
||||
: placeholder
|
||||
}
|
||||
disabled={isInputDisabled}
|
||||
rows={1}
|
||||
className={cn(
|
||||
"w-full resize-none overflow-y-auto border-0 bg-transparent text-[1rem] leading-6 text-black",
|
||||
"placeholder:text-zinc-400",
|
||||
"focus:outline-none focus:ring-0",
|
||||
"disabled:text-zinc-500",
|
||||
hasMultipleLines ? "pb-6 pl-4 pr-4 pt-2" : "pb-4 pl-4 pr-14 pt-4",
|
||||
hasMultipleLines
|
||||
? "pb-6 pl-4 pr-4 pt-2"
|
||||
: showMicButton
|
||||
? "pb-4 pl-14 pr-14 pt-4"
|
||||
: "pb-4 pl-4 pr-14 pt-4",
|
||||
)}
|
||||
/>
|
||||
{isRecording && !value && (
|
||||
<div className="pointer-events-none absolute inset-0 flex items-center justify-center">
|
||||
<RecordingIndicator
|
||||
elapsedTime={elapsedTime}
|
||||
audioStream={audioStream}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<span id="chat-input-hint" className="sr-only">
|
||||
Press Enter to send, Shift+Enter for new line
|
||||
Press Enter to send, Shift+Enter for new line, Space to record voice
|
||||
</span>
|
||||
|
||||
{isStreaming ? (
|
||||
<Button
|
||||
type="button"
|
||||
variant="icon"
|
||||
size="icon"
|
||||
aria-label="Stop generating"
|
||||
onClick={onStop}
|
||||
className="absolute bottom-[7px] right-2 border-red-600 bg-red-600 text-white hover:border-red-800 hover:bg-red-800"
|
||||
>
|
||||
<StopIcon className="h-4 w-4" weight="bold" />
|
||||
</Button>
|
||||
) : (
|
||||
<Button
|
||||
type="submit"
|
||||
variant="icon"
|
||||
size="icon"
|
||||
aria-label="Send message"
|
||||
className={cn(
|
||||
"absolute bottom-[7px] right-2 border-zinc-800 bg-zinc-800 text-white hover:border-zinc-900 hover:bg-zinc-900",
|
||||
(disabled || !value.trim()) && "opacity-20",
|
||||
)}
|
||||
disabled={disabled || !value.trim()}
|
||||
>
|
||||
<ArrowUpIcon className="h-4 w-4" weight="bold" />
|
||||
</Button>
|
||||
{showMicButton && (
|
||||
<div className="absolute bottom-[7px] left-2 flex items-center gap-1">
|
||||
<Button
|
||||
type="button"
|
||||
variant="icon"
|
||||
size="icon"
|
||||
aria-label={isRecording ? "Stop recording" : "Start recording"}
|
||||
onClick={toggleRecording}
|
||||
disabled={disabled || isTranscribing}
|
||||
className={cn(
|
||||
isRecording
|
||||
? "animate-pulse border-red-500 bg-red-500 text-white hover:border-red-600 hover:bg-red-600"
|
||||
: isTranscribing
|
||||
? "border-zinc-300 bg-zinc-100 text-zinc-400"
|
||||
: "border-zinc-300 bg-white text-zinc-500 hover:border-zinc-400 hover:bg-zinc-50 hover:text-zinc-700",
|
||||
)}
|
||||
>
|
||||
{isTranscribing ? (
|
||||
<CircleNotchIcon className="h-4 w-4 animate-spin" />
|
||||
) : (
|
||||
<MicrophoneIcon className="h-4 w-4" weight="bold" />
|
||||
)}
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="absolute bottom-[7px] right-2 flex items-center gap-1">
|
||||
{isStreaming ? (
|
||||
<Button
|
||||
type="button"
|
||||
variant="icon"
|
||||
size="icon"
|
||||
aria-label="Stop generating"
|
||||
onClick={onStop}
|
||||
className="border-red-600 bg-red-600 text-white hover:border-red-800 hover:bg-red-800"
|
||||
>
|
||||
<StopIcon className="h-4 w-4" weight="bold" />
|
||||
</Button>
|
||||
) : (
|
||||
<Button
|
||||
type="submit"
|
||||
variant="icon"
|
||||
size="icon"
|
||||
aria-label="Send message"
|
||||
className={cn(
|
||||
"border-zinc-800 bg-zinc-800 text-white hover:border-zinc-900 hover:bg-zinc-900",
|
||||
(disabled || !value.trim() || isRecording) && "opacity-20",
|
||||
)}
|
||||
disabled={disabled || !value.trim() || isRecording}
|
||||
>
|
||||
<ArrowUpIcon className="h-4 w-4" weight="bold" />
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
);
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
"use client";
|
||||
|
||||
import { useEffect, useRef, useState } from "react";
|
||||
|
||||
interface Props {
|
||||
stream: MediaStream | null;
|
||||
barCount?: number;
|
||||
barWidth?: number;
|
||||
barGap?: number;
|
||||
barColor?: string;
|
||||
minBarHeight?: number;
|
||||
maxBarHeight?: number;
|
||||
}
|
||||
|
||||
export function AudioWaveform({
|
||||
stream,
|
||||
barCount = 24,
|
||||
barWidth = 3,
|
||||
barGap = 2,
|
||||
barColor = "#ef4444", // red-500
|
||||
minBarHeight = 4,
|
||||
maxBarHeight = 32,
|
||||
}: Props) {
|
||||
const [bars, setBars] = useState<number[]>(() =>
|
||||
Array(barCount).fill(minBarHeight),
|
||||
);
|
||||
const analyserRef = useRef<AnalyserNode | null>(null);
|
||||
const audioContextRef = useRef<AudioContext | null>(null);
|
||||
const sourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
|
||||
const animationRef = useRef<number | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (!stream) {
|
||||
setBars(Array(barCount).fill(minBarHeight));
|
||||
return;
|
||||
}
|
||||
|
||||
// Create audio context and analyser
|
||||
const audioContext = new AudioContext();
|
||||
const analyser = audioContext.createAnalyser();
|
||||
analyser.fftSize = 512;
|
||||
analyser.smoothingTimeConstant = 0.8;
|
||||
|
||||
// Connect the stream to the analyser
|
||||
const source = audioContext.createMediaStreamSource(stream);
|
||||
source.connect(analyser);
|
||||
|
||||
audioContextRef.current = audioContext;
|
||||
analyserRef.current = analyser;
|
||||
sourceRef.current = source;
|
||||
|
||||
const timeData = new Uint8Array(analyser.frequencyBinCount);
|
||||
|
||||
const updateBars = () => {
|
||||
if (!analyserRef.current) return;
|
||||
|
||||
analyserRef.current.getByteTimeDomainData(timeData);
|
||||
|
||||
// Distribute time-domain data across bars
|
||||
// This shows waveform amplitude, making all bars respond to audio
|
||||
const newBars: number[] = [];
|
||||
const samplesPerBar = timeData.length / barCount;
|
||||
|
||||
for (let i = 0; i < barCount; i++) {
|
||||
// Sample waveform data for this bar
|
||||
let maxAmplitude = 0;
|
||||
const startIdx = Math.floor(i * samplesPerBar);
|
||||
const endIdx = Math.floor((i + 1) * samplesPerBar);
|
||||
|
||||
for (let j = startIdx; j < endIdx && j < timeData.length; j++) {
|
||||
// Convert to amplitude (distance from center 128)
|
||||
const amplitude = Math.abs(timeData[j] - 128);
|
||||
maxAmplitude = Math.max(maxAmplitude, amplitude);
|
||||
}
|
||||
|
||||
// Map amplitude (0-128) to bar height
|
||||
const normalized = (maxAmplitude / 128) * 255;
|
||||
const height =
|
||||
minBarHeight + (normalized / 255) * (maxBarHeight - minBarHeight);
|
||||
newBars.push(height);
|
||||
}
|
||||
|
||||
setBars(newBars);
|
||||
animationRef.current = requestAnimationFrame(updateBars);
|
||||
};
|
||||
|
||||
updateBars();
|
||||
|
||||
return () => {
|
||||
if (animationRef.current) {
|
||||
cancelAnimationFrame(animationRef.current);
|
||||
}
|
||||
if (sourceRef.current) {
|
||||
sourceRef.current.disconnect();
|
||||
}
|
||||
if (audioContextRef.current) {
|
||||
audioContextRef.current.close();
|
||||
}
|
||||
analyserRef.current = null;
|
||||
audioContextRef.current = null;
|
||||
sourceRef.current = null;
|
||||
};
|
||||
}, [stream, barCount, minBarHeight, maxBarHeight]);
|
||||
|
||||
const totalWidth = barCount * barWidth + (barCount - 1) * barGap;
|
||||
|
||||
return (
|
||||
<div
|
||||
className="flex items-center justify-center"
|
||||
style={{
|
||||
width: totalWidth,
|
||||
height: maxBarHeight,
|
||||
gap: barGap,
|
||||
}}
|
||||
>
|
||||
{bars.map((height, i) => {
|
||||
const barHeight = Math.max(minBarHeight, height);
|
||||
return (
|
||||
<div
|
||||
key={i}
|
||||
className="relative"
|
||||
style={{
|
||||
width: barWidth,
|
||||
height: maxBarHeight,
|
||||
}}
|
||||
>
|
||||
<div
|
||||
className="absolute left-0 rounded-full transition-[height] duration-75"
|
||||
style={{
|
||||
width: barWidth,
|
||||
height: barHeight,
|
||||
top: "50%",
|
||||
transform: "translateY(-50%)",
|
||||
backgroundColor: barColor,
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
import { formatElapsedTime } from "../helpers";
|
||||
import { AudioWaveform } from "./AudioWaveform";
|
||||
|
||||
type Props = {
|
||||
elapsedTime: number;
|
||||
audioStream: MediaStream | null;
|
||||
};
|
||||
|
||||
export function RecordingIndicator({ elapsedTime, audioStream }: Props) {
|
||||
return (
|
||||
<div className="flex items-center gap-3">
|
||||
<AudioWaveform
|
||||
stream={audioStream}
|
||||
barCount={20}
|
||||
barWidth={3}
|
||||
barGap={2}
|
||||
barColor="#ef4444"
|
||||
minBarHeight={4}
|
||||
maxBarHeight={24}
|
||||
/>
|
||||
<span className="min-w-[3ch] text-sm font-medium text-red-500">
|
||||
{formatElapsedTime(elapsedTime)}
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
export function formatElapsedTime(ms: number): string {
|
||||
const seconds = Math.floor(ms / 1000);
|
||||
const minutes = Math.floor(seconds / 60);
|
||||
const remainingSeconds = seconds % 60;
|
||||
return `${minutes}:${remainingSeconds.toString().padStart(2, "0")}`;
|
||||
}
|
||||
@@ -6,7 +6,7 @@ import {
|
||||
useState,
|
||||
} from "react";
|
||||
|
||||
interface UseChatInputArgs {
|
||||
interface Args {
|
||||
onSend: (message: string) => void;
|
||||
disabled?: boolean;
|
||||
maxRows?: number;
|
||||
@@ -18,7 +18,7 @@ export function useChatInput({
|
||||
disabled = false,
|
||||
maxRows = 5,
|
||||
inputId = "chat-input",
|
||||
}: UseChatInputArgs) {
|
||||
}: Args) {
|
||||
const [value, setValue] = useState("");
|
||||
const [hasMultipleLines, setHasMultipleLines] = useState(false);
|
||||
|
||||
|
||||
@@ -0,0 +1,240 @@
|
||||
import { useToast } from "@/components/molecules/Toast/use-toast";
|
||||
import React, {
|
||||
KeyboardEvent,
|
||||
useCallback,
|
||||
useEffect,
|
||||
useRef,
|
||||
useState,
|
||||
} from "react";
|
||||
|
||||
const MAX_RECORDING_DURATION = 2 * 60 * 1000; // 2 minutes in ms
|
||||
|
||||
interface Args {
|
||||
setValue: React.Dispatch<React.SetStateAction<string>>;
|
||||
disabled?: boolean;
|
||||
isStreaming?: boolean;
|
||||
value: string;
|
||||
baseHandleKeyDown: (event: KeyboardEvent<HTMLTextAreaElement>) => void;
|
||||
}
|
||||
|
||||
export function useVoiceRecording({
|
||||
setValue,
|
||||
disabled = false,
|
||||
isStreaming = false,
|
||||
value,
|
||||
baseHandleKeyDown,
|
||||
}: Args) {
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
const [isTranscribing, setIsTranscribing] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [elapsedTime, setElapsedTime] = useState(0);
|
||||
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const chunksRef = useRef<Blob[]>([]);
|
||||
const timerRef = useRef<NodeJS.Timeout | null>(null);
|
||||
const startTimeRef = useRef<number>(0);
|
||||
const streamRef = useRef<MediaStream | null>(null);
|
||||
const isRecordingRef = useRef(false);
|
||||
|
||||
const isSupported =
|
||||
typeof window !== "undefined" &&
|
||||
!!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia);
|
||||
|
||||
const clearTimer = useCallback(() => {
|
||||
if (timerRef.current) {
|
||||
clearInterval(timerRef.current);
|
||||
timerRef.current = null;
|
||||
}
|
||||
}, []);
|
||||
|
||||
const cleanup = useCallback(() => {
|
||||
clearTimer();
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((track) => track.stop());
|
||||
streamRef.current = null;
|
||||
}
|
||||
mediaRecorderRef.current = null;
|
||||
chunksRef.current = [];
|
||||
setElapsedTime(0);
|
||||
}, [clearTimer]);
|
||||
|
||||
const handleTranscription = useCallback(
|
||||
(text: string) => {
|
||||
setValue((prev) => {
|
||||
const trimmedPrev = prev.trim();
|
||||
if (trimmedPrev) {
|
||||
return `${trimmedPrev} ${text}`;
|
||||
}
|
||||
return text;
|
||||
});
|
||||
},
|
||||
[setValue],
|
||||
);
|
||||
|
||||
const transcribeAudio = useCallback(
|
||||
async (audioBlob: Blob) => {
|
||||
setIsTranscribing(true);
|
||||
setError(null);
|
||||
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append("audio", audioBlob);
|
||||
|
||||
const response = await fetch("/api/transcribe", {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const data = await response.json().catch(() => ({}));
|
||||
throw new Error(data.error || "Transcription failed");
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
if (data.text) {
|
||||
handleTranscription(data.text);
|
||||
}
|
||||
} catch (err) {
|
||||
const message =
|
||||
err instanceof Error ? err.message : "Transcription failed";
|
||||
setError(message);
|
||||
console.error("Transcription error:", err);
|
||||
} finally {
|
||||
setIsTranscribing(false);
|
||||
}
|
||||
},
|
||||
[handleTranscription],
|
||||
);
|
||||
|
||||
const stopRecording = useCallback(() => {
|
||||
if (mediaRecorderRef.current && isRecordingRef.current) {
|
||||
mediaRecorderRef.current.stop();
|
||||
isRecordingRef.current = false;
|
||||
setIsRecording(false);
|
||||
clearTimer();
|
||||
}
|
||||
}, [clearTimer]);
|
||||
|
||||
const startRecording = useCallback(async () => {
|
||||
if (disabled || isRecordingRef.current || isTranscribing) return;
|
||||
|
||||
setError(null);
|
||||
chunksRef.current = [];
|
||||
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
streamRef.current = stream;
|
||||
|
||||
const mediaRecorder = new MediaRecorder(stream, {
|
||||
mimeType: MediaRecorder.isTypeSupported("audio/webm")
|
||||
? "audio/webm"
|
||||
: "audio/mp4",
|
||||
});
|
||||
|
||||
mediaRecorderRef.current = mediaRecorder;
|
||||
|
||||
mediaRecorder.ondataavailable = (event) => {
|
||||
if (event.data.size > 0) {
|
||||
chunksRef.current.push(event.data);
|
||||
}
|
||||
};
|
||||
|
||||
mediaRecorder.onstop = async () => {
|
||||
const audioBlob = new Blob(chunksRef.current, {
|
||||
type: mediaRecorder.mimeType,
|
||||
});
|
||||
|
||||
// Cleanup stream
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((track) => track.stop());
|
||||
streamRef.current = null;
|
||||
}
|
||||
|
||||
if (audioBlob.size > 0) {
|
||||
await transcribeAudio(audioBlob);
|
||||
}
|
||||
};
|
||||
|
||||
mediaRecorder.start(1000); // Collect data every second
|
||||
isRecordingRef.current = true;
|
||||
setIsRecording(true);
|
||||
startTimeRef.current = Date.now();
|
||||
|
||||
// Start elapsed time timer
|
||||
timerRef.current = setInterval(() => {
|
||||
const elapsed = Date.now() - startTimeRef.current;
|
||||
setElapsedTime(elapsed);
|
||||
|
||||
// Auto-stop at max duration
|
||||
if (elapsed >= MAX_RECORDING_DURATION) {
|
||||
stopRecording();
|
||||
}
|
||||
}, 100);
|
||||
} catch (err) {
|
||||
console.error("Failed to start recording:", err);
|
||||
if (err instanceof DOMException && err.name === "NotAllowedError") {
|
||||
setError("Microphone permission denied");
|
||||
} else {
|
||||
setError("Failed to access microphone");
|
||||
}
|
||||
cleanup();
|
||||
}
|
||||
}, [disabled, isTranscribing, stopRecording, transcribeAudio, cleanup]);
|
||||
|
||||
const toggleRecording = useCallback(() => {
|
||||
if (isRecording) {
|
||||
stopRecording();
|
||||
} else {
|
||||
startRecording();
|
||||
}
|
||||
}, [isRecording, startRecording, stopRecording]);
|
||||
|
||||
const { toast } = useToast();
|
||||
|
||||
useEffect(() => {
|
||||
if (error) {
|
||||
toast({
|
||||
title: "Voice recording failed",
|
||||
description: error,
|
||||
variant: "destructive",
|
||||
});
|
||||
}
|
||||
}, [error, toast]);
|
||||
|
||||
const handleKeyDown = useCallback(
|
||||
(event: KeyboardEvent<HTMLTextAreaElement>) => {
|
||||
if (event.key === " " && !value.trim() && !isTranscribing) {
|
||||
event.preventDefault();
|
||||
toggleRecording();
|
||||
return;
|
||||
}
|
||||
baseHandleKeyDown(event);
|
||||
},
|
||||
[value, isTranscribing, toggleRecording, baseHandleKeyDown],
|
||||
);
|
||||
|
||||
const showMicButton = isSupported && !isStreaming;
|
||||
const isInputDisabled = disabled || isStreaming || isTranscribing;
|
||||
|
||||
// Cleanup on unmount
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
cleanup();
|
||||
};
|
||||
}, [cleanup]);
|
||||
|
||||
return {
|
||||
isRecording,
|
||||
isTranscribing,
|
||||
error,
|
||||
elapsedTime,
|
||||
startRecording,
|
||||
stopRecording,
|
||||
toggleRecording,
|
||||
isSupported,
|
||||
handleKeyDown,
|
||||
showMicButton,
|
||||
isInputDisabled,
|
||||
audioStream: streamRef.current,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user