diff --git a/AGENTS.md b/AGENTS.md index cd176f8a2d..202c4c6e02 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -16,7 +16,6 @@ See `docs/content/platform/getting-started.md` for setup instructions. - Format Python code with `poetry run format`. - Format frontend code using `pnpm format`. - ## Frontend guidelines: See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference: @@ -33,14 +32,17 @@ See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference: 4. **Styling**: Tailwind CSS only, use design tokens, Phosphor Icons only 5. **Testing**: Add Storybook stories for new components, Playwright for E2E 6. **Code conventions**: Function declarations (not arrow functions) for components/handlers + - Component props should be `interface Props { ... }` (not exported) unless the interface needs to be used outside the component - Separate render logic from business logic (component.tsx + useComponent.ts + helpers.ts) - Colocate state when possible and avoid creating large components, use sub-components ( local `/components` folder next to the parent component ) when sensible - Avoid large hooks, abstract logic into `helpers.ts` files when sensible - Use function declarations for components, arrow functions only for callbacks - No barrel files or `index.ts` re-exports -- Do not use `useCallback` or `useMemo` unless strictly needed - Avoid comments at all times unless the code is very complex +- Do not use `useCallback` or `useMemo` unless asked to optimise a given function +- Do not type hook returns, let Typescript infer as much as possible +- Never type with `any`, if not types available use `unknown` ## Testing @@ -49,22 +51,8 @@ See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference: Always run the relevant linters and tests before committing. Use conventional commit messages for all commits (e.g. `feat(backend): add API`). - Types: - - feat - - fix - - refactor - - ci - - dx (developer experience) - Scopes: - - platform - - platform/library - - platform/marketplace - - backend - - backend/executor - - frontend - - frontend/library - - frontend/marketplace - - blocks +Types: - feat - fix - refactor - ci - dx (developer experience) +Scopes: - platform - platform/library - platform/marketplace - backend - backend/executor - frontend - frontend/library - frontend/marketplace - blocks ## Pull requests diff --git a/autogpt_platform/CLAUDE.md b/autogpt_platform/CLAUDE.md index 9690178587..a5a588b667 100644 --- a/autogpt_platform/CLAUDE.md +++ b/autogpt_platform/CLAUDE.md @@ -85,17 +85,6 @@ pnpm format pnpm types ``` -**📖 Complete Guide**: See `/frontend/CONTRIBUTING.md` and `/frontend/.cursorrules` for comprehensive frontend patterns. - -**Key Frontend Conventions:** - -- Separate render logic from data/behavior in components -- Use generated API hooks from `@/app/api/__generated__/endpoints/` -- Use function declarations (not arrow functions) for components/handlers -- Use design system components from `src/components/` (atoms, molecules, organisms) -- Only use Phosphor Icons -- Never use `src/components/__legacy__/*` or deprecated `BackendAPI` - ## Architecture Overview ### Backend Architecture @@ -261,14 +250,17 @@ See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference: 4. **Styling**: Tailwind CSS only, use design tokens, Phosphor Icons only 5. **Testing**: Add Storybook stories for new components, Playwright for E2E 6. **Code conventions**: Function declarations (not arrow functions) for components/handlers + - Component props should be `interface Props { ... }` (not exported) unless the interface needs to be used outside the component - Separate render logic from business logic (component.tsx + useComponent.ts + helpers.ts) - Colocate state when possible and avoid creating large components, use sub-components ( local `/components` folder next to the parent component ) when sensible - Avoid large hooks, abstract logic into `helpers.ts` files when sensible - Use function declarations for components, arrow functions only for callbacks - No barrel files or `index.ts` re-exports -- Do not use `useCallback` or `useMemo` unless strictly needed +- Do not use `useCallback` or `useMemo` unless asked to optimise a given function - Avoid comments at all times unless the code is very complex +- Do not type hook returns, let Typescript infer as much as possible +- Never type with `any`, if not types available use `unknown` ### Security Implementation diff --git a/autogpt_platform/frontend/.env.default b/autogpt_platform/frontend/.env.default index af250fb8bf..7a9d81e39e 100644 --- a/autogpt_platform/frontend/.env.default +++ b/autogpt_platform/frontend/.env.default @@ -34,3 +34,6 @@ NEXT_PUBLIC_PREVIEW_STEALING_DEV= # PostHog Analytics NEXT_PUBLIC_POSTHOG_KEY= NEXT_PUBLIC_POSTHOG_HOST=https://eu.i.posthog.com + +# OpenAI (for voice transcription) +OPENAI_API_KEY= diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/useSessionsPagination.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/useSessionsPagination.ts index 11ddd937af..61e3e6f37f 100644 --- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/useSessionsPagination.ts +++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/useSessionsPagination.ts @@ -73,9 +73,9 @@ export function useSessionsPagination({ enabled }: UseSessionsPaginationArgs) { }; const reset = () => { + // Only reset the offset - keep existing sessions visible during refetch + // The effect will replace sessions when new data arrives at offset 0 setOffset(0); - setAccumulatedSessions([]); - setTotalCount(null); }; return { diff --git a/autogpt_platform/frontend/src/app/api/transcribe/route.ts b/autogpt_platform/frontend/src/app/api/transcribe/route.ts new file mode 100644 index 0000000000..10c182cdfa --- /dev/null +++ b/autogpt_platform/frontend/src/app/api/transcribe/route.ts @@ -0,0 +1,77 @@ +import { getServerAuthToken } from "@/lib/autogpt-server-api/helpers"; +import { NextRequest, NextResponse } from "next/server"; + +const WHISPER_API_URL = "https://api.openai.com/v1/audio/transcriptions"; +const MAX_FILE_SIZE = 25 * 1024 * 1024; // 25MB - Whisper's limit + +function getExtensionFromMimeType(mimeType: string): string { + const subtype = mimeType.split("/")[1]?.split(";")[0]; + return subtype || "webm"; +} + +export async function POST(request: NextRequest) { + const token = await getServerAuthToken(); + + if (!token || token === "no-token-found") { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + + const apiKey = process.env.OPENAI_API_KEY; + + if (!apiKey) { + return NextResponse.json( + { error: "OpenAI API key not configured" }, + { status: 401 }, + ); + } + + try { + const formData = await request.formData(); + const audioFile = formData.get("audio"); + + if (!audioFile || !(audioFile instanceof Blob)) { + return NextResponse.json( + { error: "No audio file provided" }, + { status: 400 }, + ); + } + + if (audioFile.size > MAX_FILE_SIZE) { + return NextResponse.json( + { error: "File too large. Maximum size is 25MB." }, + { status: 413 }, + ); + } + + const ext = getExtensionFromMimeType(audioFile.type); + const whisperFormData = new FormData(); + whisperFormData.append("file", audioFile, `recording.${ext}`); + whisperFormData.append("model", "whisper-1"); + + const response = await fetch(WHISPER_API_URL, { + method: "POST", + headers: { + Authorization: `Bearer ${apiKey}`, + }, + body: whisperFormData, + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + console.error("Whisper API error:", errorData); + return NextResponse.json( + { error: errorData.error?.message || "Transcription failed" }, + { status: response.status }, + ); + } + + const result = await response.json(); + return NextResponse.json({ text: result.text }); + } catch (error) { + console.error("Transcription error:", error); + return NextResponse.json( + { error: "Failed to process audio" }, + { status: 500 }, + ); + } +} diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx index c45e8dc250..521f6f6320 100644 --- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx +++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx @@ -1,7 +1,14 @@ import { Button } from "@/components/atoms/Button/Button"; import { cn } from "@/lib/utils"; -import { ArrowUpIcon, StopIcon } from "@phosphor-icons/react"; +import { + ArrowUpIcon, + CircleNotchIcon, + MicrophoneIcon, + StopIcon, +} from "@phosphor-icons/react"; +import { RecordingIndicator } from "./components/RecordingIndicator"; import { useChatInput } from "./useChatInput"; +import { useVoiceRecording } from "./useVoiceRecording"; export interface Props { onSend: (message: string) => void; @@ -21,13 +28,36 @@ export function ChatInput({ className, }: Props) { const inputId = "chat-input"; - const { value, handleKeyDown, handleSubmit, handleChange, hasMultipleLines } = - useChatInput({ - onSend, - disabled: disabled || isStreaming, - maxRows: 4, - inputId, - }); + const { + value, + setValue, + handleKeyDown: baseHandleKeyDown, + handleSubmit, + handleChange, + hasMultipleLines, + } = useChatInput({ + onSend, + disabled: disabled || isStreaming, + maxRows: 4, + inputId, + }); + + const { + isRecording, + isTranscribing, + elapsedTime, + toggleRecording, + handleKeyDown, + showMicButton, + isInputDisabled, + audioStream, + } = useVoiceRecording({ + setValue, + disabled: disabled || isStreaming, + isStreaming, + value, + baseHandleKeyDown, + }); return (
); diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/AudioWaveform.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/AudioWaveform.tsx new file mode 100644 index 0000000000..10cbb3fc9f --- /dev/null +++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/components/AudioWaveform.tsx @@ -0,0 +1,142 @@ +"use client"; + +import { useEffect, useRef, useState } from "react"; + +interface Props { + stream: MediaStream | null; + barCount?: number; + barWidth?: number; + barGap?: number; + barColor?: string; + minBarHeight?: number; + maxBarHeight?: number; +} + +export function AudioWaveform({ + stream, + barCount = 24, + barWidth = 3, + barGap = 2, + barColor = "#ef4444", // red-500 + minBarHeight = 4, + maxBarHeight = 32, +}: Props) { + const [bars, setBars] = useState