From 951bfbdb714b2414ba077c7a2c006b99c68c1b31 Mon Sep 17 00:00:00 2001 From: Lluis Agusti Date: Thu, 29 Jan 2026 00:21:03 +0700 Subject: [PATCH] chore: suggestions --- .../Chat/components/ChatInput/ChatInput.tsx | 46 +------- .../components/RecordingIndicator.tsx | 41 +++++++ .../Chat/components/ChatInput/helpers.ts | 6 + ...6-01-28-voice-to-text-chat-input-design.md | 105 ------------------ 4 files changed, 49 insertions(+), 149 deletions(-) create mode 100644 autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/RecordingIndicator.tsx create mode 100644 autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/helpers.ts delete mode 100644 docs/plans/2026-01-28-voice-to-text-chat-input-design.md diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx index 28418536a2..7af7eea9a9 100644 --- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx +++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx @@ -1,4 +1,5 @@ import { Button } from "@/components/atoms/Button/Button"; +import { useToast } from "@/components/molecules/Toast/use-toast"; import { cn } from "@/lib/utils"; import { ArrowUpIcon, @@ -7,52 +8,9 @@ import { StopIcon, } from "@phosphor-icons/react"; import { KeyboardEvent, useCallback, useEffect } from "react"; +import { RecordingIndicator } from "./components/RecordingIndicator"; import { useChatInput } from "./useChatInput"; import { useVoiceRecording } from "./useVoiceRecording"; -import { useToast } from "@/components/molecules/Toast/use-toast"; - -function formatElapsedTime(ms: number): string { - const seconds = Math.floor(ms / 1000); - const minutes = Math.floor(seconds / 60); - const remainingSeconds = seconds % 60; - return `${minutes}:${remainingSeconds.toString().padStart(2, "0")}`; -} - -function RecordingIndicator({ elapsedTime }: { elapsedTime: number }) { - return ( -
-
- {[0, 1, 2, 3, 4].map((i) => ( -
- ))} -
- - {formatElapsedTime(elapsedTime)} - - -
- ); -} export interface Props { onSend: (message: string) => void; diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/RecordingIndicator.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/RecordingIndicator.tsx new file mode 100644 index 0000000000..326f390bc7 --- /dev/null +++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/RecordingIndicator.tsx @@ -0,0 +1,41 @@ +import { formatElapsedTime } from "../helpers"; + +type Props = { + elapsedTime: number; +}; + +export function RecordingIndicator({ elapsedTime }: Props) { + return ( +
+
+ {[0, 1, 2, 3, 4].map((i) => ( +
+ ))} +
+ + {formatElapsedTime(elapsedTime)} + + +
+ ); +} diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/helpers.ts b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/helpers.ts new file mode 100644 index 0000000000..26bae8c9d9 --- /dev/null +++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/helpers.ts @@ -0,0 +1,6 @@ +export function formatElapsedTime(ms: number): string { + const seconds = Math.floor(ms / 1000); + const minutes = Math.floor(seconds / 60); + const remainingSeconds = seconds % 60; + return `${minutes}:${remainingSeconds.toString().padStart(2, "0")}`; +} diff --git a/docs/plans/2026-01-28-voice-to-text-chat-input-design.md b/docs/plans/2026-01-28-voice-to-text-chat-input-design.md deleted file mode 100644 index 649a975c74..0000000000 --- a/docs/plans/2026-01-28-voice-to-text-chat-input-design.md +++ /dev/null @@ -1,105 +0,0 @@ -# Voice-to-Text Chat Input Feature - -## Overview - -Add a microphone button to the ChatInput component that allows users to record voice and transcribe it to text using OpenAI's Whisper API, similar to ChatGPT. - -## Requirements - -- Toggle recording with click (click to start, click to stop) -- Space key triggers recording toggle when input is focused -- Maximum recording duration: 2 minutes -- Use OpenAI Whisper API for transcription -- Frontend API route handles the Whisper call - -## Architecture - -### Components to Create/Modify - -1. **New API route** - `/api/transcribe/route.ts` - - Accepts audio blob (webm) via POST - - Calls OpenAI Whisper API (`whisper-1` model) - - Returns transcribed text - -2. **New hook** - `useVoiceRecording.ts` - - Manages MediaRecorder state - - Handles start/stop recording - - Enforces 2-minute max duration - - Sends audio to transcribe API - -3. **Modified component** - `ChatInput.tsx` - - Add mic button (left of send button) - - Visual feedback during recording - - Space key handler for toggle - -### Environment Variable - -- `OPENAI_API_KEY` (server-side only) - -## Hook Design: `useVoiceRecording.ts` - -### State -- `isRecording` - boolean for recording state -- `isTranscribing` - boolean for API call in progress -- `error` - string for error messages - -### Functions -- `startRecording()` - Request mic permission, start MediaRecorder -- `stopRecording()` - Stop recording, send to API, return text -- `toggleRecording()` - Convenience function for button/space key - -### Constraints -- Auto-stop at 2 minutes with timer -- Show elapsed time during recording -- Cleanup on unmount - -## UI/UX Design - -### Mic Button Placement -- Position: Left of the send button, inside the input wrapper -- Icon: Microphone (Phosphor icons) - -### Visual States - -| State | Mic Button | Input Area | -|-------|-----------|------------| -| Idle | Gray mic icon | Normal | -| Recording | Red pulsing mic + elapsed time | Red border glow | -| Transcribing | Spinner | Disabled, "Transcribing..." placeholder | -| Error | Mic with warning | Inline error | - -### Button Behavior -- Visible when input is empty or has text -- Hidden during streaming (when stop button shows) - -### Accessibility -- `aria-label` updates based on state -- Screen reader announcements -- Space key to toggle when focused - -## API Route: `POST /api/transcribe` - -### Request -- Content-Type: `multipart/form-data` -- Body: `audio` file (webm blob) - -### Response -```json -{ "text": "transcribed text here" } -``` - -### Error Responses -- `400` - No audio file provided -- `401` - Missing API key configuration -- `413` - File too large (> 25MB) -- `500` - Whisper API error - -## File Changes Summary - -| File | Action | -|------|--------| -| `src/app/api/transcribe/route.ts` | Create | -| `src/components/contextual/Chat/components/ChatInput/useVoiceRecording.ts` | Create | -| `src/components/contextual/Chat/components/ChatInput/ChatInput.tsx` | Modify | -| `src/components/contextual/Chat/components/ChatInput/useChatInput.ts` | Minor modify (space key coordination) | -| `.env.default` | Add OPENAI_API_KEY placeholder |