From a5e4a8e7733996427fcacf54bbd0fbfa9101ae7b Mon Sep 17 00:00:00 2001 From: majdyz Date: Wed, 15 Apr 2026 12:29:23 +0700 Subject: [PATCH] feat(copilot): standard/advanced model toggle with Opus rate-limit multiplier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a per-request model tier toggle to CoPilot. Users can switch between Standard (Sonnet) and Advanced (Opus) from the chat input toolbar. Opus turns consume rate-limit quota 5× faster (matching Anthropic pricing), so no separate entitlement gate is needed — usage self-limits via the budget. - Add CopilotLlmModel = Literal["standard", "advanced"] type - ModelToggleButton (sky-blue, star icon, label only when active) - localStorage persistence via Key.COPILOT_MODEL - Backend: model tier passed in request body, resolved to actual model name - Rate-limit multiplier 5.0 for Opus in record_token_usage (Redis only, does not affect PlatformCostLog or cost_usd — those use real API values) - Reduce claude_agent_max_budget_usd default from $15 to $10 --- .../backend/api/features/chat/routes.py | 8 +++- .../backend/backend/copilot/config.py | 11 ++++- .../backend/copilot/executor/processor.py | 1 + .../backend/backend/copilot/executor/utils.py | 8 +++- .../backend/backend/copilot/rate_limit.py | 13 +++++- .../backend/backend/copilot/sdk/service.py | 37 +++++++++++++++- .../copilot/sdk/service_helpers_test.py | 42 +++++++++++++++++++ .../backend/backend/copilot/token_tracking.py | 5 +++ .../backend/copilot/token_tracking_test.py | 1 + .../components/ChatInput/ChatInput.tsx | 32 +++++++++++++- .../components/ModelToggleButton.tsx | 38 +++++++++++++++++ .../__tests__/ModelToggleButton.test.tsx | 36 ++++++++++++++++ .../src/app/(platform)/copilot/store.ts | 18 ++++++++ .../app/(platform)/copilot/useCopilotPage.ts | 2 + .../(platform)/copilot/useCopilotStream.ts | 12 ++++-- .../src/services/storage/local-storage.ts | 1 + 16 files changed, 253 insertions(+), 12 deletions(-) create mode 100644 autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatInput/components/ModelToggleButton.tsx create mode 100644 autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatInput/components/__tests__/ModelToggleButton.test.tsx diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index aa2dc85e15..f8c3e3b804 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -15,7 +15,7 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator from backend.copilot import service as chat_service from backend.copilot import stream_registry -from backend.copilot.config import ChatConfig, CopilotMode +from backend.copilot.config import ChatConfig, CopilotLlmModel, CopilotMode from backend.copilot.db import get_chat_messages_paginated from backend.copilot.executor.utils import enqueue_cancel_task, enqueue_copilot_turn from backend.copilot.model import ( @@ -139,6 +139,11 @@ class StreamChatRequest(BaseModel): description="Autopilot mode: 'fast' for baseline LLM, 'extended_thinking' for Claude Agent SDK. " "If None, uses the server default (extended_thinking).", ) + model: CopilotLlmModel | None = Field( + default=None, + description="Model tier: 'standard' for the default model, 'advanced' for the highest-capability model. " + "If None, the server applies per-user LD targeting then falls back to config.", + ) class CreateSessionRequest(BaseModel): @@ -891,6 +896,7 @@ async def stream_chat_post( context=request.context, file_ids=sanitized_file_ids, mode=request.mode, + model=request.model, ) setup_time = (time.perf_counter() - stream_start_time) * 1000 diff --git a/autogpt_platform/backend/backend/copilot/config.py b/autogpt_platform/backend/backend/copilot/config.py index 28fa24f868..f6daed754a 100644 --- a/autogpt_platform/backend/backend/copilot/config.py +++ b/autogpt_platform/backend/backend/copilot/config.py @@ -16,6 +16,13 @@ from backend.util.clients import OPENROUTER_BASE_URL # subscription flag → LaunchDarkly COPILOT_SDK → config.use_claude_agent_sdk. CopilotMode = Literal["fast", "extended_thinking"] +# Per-request model tier set by the frontend model toggle. +# 'standard' uses the global config default (currently Sonnet). +# 'advanced' forces the highest-capability model (currently Opus). +# None means no preference — falls through to LD per-user targeting, then config. +# Using tier names instead of model names keeps the contract model-agnostic. +CopilotLlmModel = Literal["standard", "advanced"] + class ChatConfig(BaseSettings): """Configuration for the chat system.""" @@ -163,12 +170,12 @@ class ChatConfig(BaseSettings): "CHAT_CLAUDE_AGENT_MAX_TURNS env var if your workflows need more.", ) claude_agent_max_budget_usd: float = Field( - default=15.0, + default=10.0, ge=0.01, le=1000.0, description="Maximum spend in USD per SDK query. The CLI attempts " "to wrap up gracefully when this budget is reached. " - "Set to $15 to allow most tasks to complete (p50=$5.37, p75=$13.07). " + "Set to $10 to allow most tasks to complete (p50=$5.37, p75=$13.07). " "Override via CHAT_CLAUDE_AGENT_MAX_BUDGET_USD env var.", ) claude_agent_max_thinking_tokens: int = Field( diff --git a/autogpt_platform/backend/backend/copilot/executor/processor.py b/autogpt_platform/backend/backend/copilot/executor/processor.py index cc83b2dd99..0266e57806 100644 --- a/autogpt_platform/backend/backend/copilot/executor/processor.py +++ b/autogpt_platform/backend/backend/copilot/executor/processor.py @@ -351,6 +351,7 @@ class CoPilotProcessor: context=entry.context, file_ids=entry.file_ids, mode=effective_mode, + model=entry.model, ) async for chunk in stream_registry.stream_and_publish( session_id=entry.session_id, diff --git a/autogpt_platform/backend/backend/copilot/executor/utils.py b/autogpt_platform/backend/backend/copilot/executor/utils.py index 0f7d23d9ba..3256f94869 100644 --- a/autogpt_platform/backend/backend/copilot/executor/utils.py +++ b/autogpt_platform/backend/backend/copilot/executor/utils.py @@ -9,7 +9,7 @@ import logging from pydantic import BaseModel -from backend.copilot.config import CopilotMode +from backend.copilot.config import CopilotLlmModel, CopilotMode from backend.data.rabbitmq import Exchange, ExchangeType, Queue, RabbitMQConfig from backend.util.logging import TruncatedLogger, is_structured_logging_enabled @@ -160,6 +160,9 @@ class CoPilotExecutionEntry(BaseModel): mode: CopilotMode | None = None """Autopilot mode override: 'fast' or 'extended_thinking'. None = server default.""" + model: CopilotLlmModel | None = None + """Per-request model tier: 'standard' or 'advanced'. None = server default.""" + class CancelCoPilotEvent(BaseModel): """Event to cancel a CoPilot operation.""" @@ -180,6 +183,7 @@ async def enqueue_copilot_turn( context: dict[str, str] | None = None, file_ids: list[str] | None = None, mode: CopilotMode | None = None, + model: CopilotLlmModel | None = None, ) -> None: """Enqueue a CoPilot task for processing by the executor service. @@ -192,6 +196,7 @@ async def enqueue_copilot_turn( context: Optional context for the message (e.g., {url: str, content: str}) file_ids: Optional workspace file IDs attached to the user's message mode: Autopilot mode override ('fast' or 'extended_thinking'). None = server default. + model: Per-request model tier ('standard' or 'advanced'). None = server default. """ from backend.util.clients import get_async_copilot_queue @@ -204,6 +209,7 @@ async def enqueue_copilot_turn( context=context, file_ids=file_ids, mode=mode, + model=model, ) queue_client = await get_async_copilot_queue() diff --git a/autogpt_platform/backend/backend/copilot/rate_limit.py b/autogpt_platform/backend/backend/copilot/rate_limit.py index f72d36de23..3124c28992 100644 --- a/autogpt_platform/backend/backend/copilot/rate_limit.py +++ b/autogpt_platform/backend/backend/copilot/rate_limit.py @@ -302,6 +302,7 @@ async def record_token_usage( *, cache_read_tokens: int = 0, cache_creation_tokens: int = 0, + model_cost_multiplier: float = 1.0, ) -> None: """Record token usage for a user across all windows. @@ -315,12 +316,17 @@ async def record_token_usage( ``prompt_tokens`` should be the *uncached* input count (``input_tokens`` from the API response). Cache counts are passed separately. + ``model_cost_multiplier`` scales the final weighted total to reflect + relative model cost. Use 5.0 for Opus (5× more expensive than Sonnet) + so that Opus turns deplete the rate limit faster, proportional to cost. + Args: user_id: The user's ID. prompt_tokens: Uncached input tokens. completion_tokens: Output tokens. cache_read_tokens: Tokens served from prompt cache (10% cost). cache_creation_tokens: Tokens written to prompt cache (25% cost). + model_cost_multiplier: Relative model cost factor (1.0 = Sonnet, 5.0 = Opus). """ prompt_tokens = max(0, prompt_tokens) completion_tokens = max(0, completion_tokens) @@ -332,7 +338,9 @@ async def record_token_usage( + round(cache_creation_tokens * 0.25) + round(cache_read_tokens * 0.1) ) - total = weighted_input + completion_tokens + total = round( + (weighted_input + completion_tokens) * max(1.0, model_cost_multiplier) + ) if total <= 0: return @@ -340,11 +348,12 @@ async def record_token_usage( prompt_tokens + cache_read_tokens + cache_creation_tokens + completion_tokens ) logger.info( - "Recording token usage for %s: raw=%d, weighted=%d " + "Recording token usage for %s: raw=%d, weighted=%d, multiplier=%.1fx " "(uncached=%d, cache_read=%d@10%%, cache_create=%d@25%%, output=%d)", user_id[:8], raw_total, total, + model_cost_multiplier, prompt_tokens, cache_read_tokens, cache_creation_tokens, diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py index c018d9bffa..669ca286b9 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service.py @@ -60,7 +60,7 @@ from backend.util.feature_flag import ( ) from backend.util.settings import Settings -from ..config import ChatConfig, CopilotMode +from ..config import ChatConfig, CopilotLlmModel, CopilotMode from ..constants import ( COPILOT_ERROR_PREFIX, COPILOT_RETRYABLE_ERROR_PREFIX, @@ -1958,6 +1958,7 @@ async def stream_chat_completion_sdk( file_ids: list[str] | None = None, permissions: "CopilotPermissions | None" = None, mode: CopilotMode | None = None, + model: CopilotLlmModel | None = None, **_kwargs: Any, ) -> AsyncIterator[StreamBaseResponse]: """Stream chat completion using Claude Agent SDK. @@ -1968,6 +1969,9 @@ async def stream_chat_completion_sdk( saved to the SDK working directory for the Read tool. mode: Accepted for signature compatibility with the baseline path. The SDK path does not currently branch on this value. + model: Per-request model preference from the frontend toggle. + 'opus' → Claude Opus; 'sonnet' → global config default. + Takes priority over per-user LaunchDarkly targeting. """ _ = mode # SDK path ignores the requested mode. @@ -2298,6 +2302,36 @@ async def stream_chat_completion_sdk( ) sdk_model = user_model_override + # Explicit per-request model tier from frontend toggle — highest priority, + # overrides both global config and per-user LD targeting. + # 'advanced' → claude-opus-4-6 (highest capability today). + # 'standard' → config.model (current Sonnet default). + # Rate-limit multiplier (5×) ensures Opus turns deplete quota faster — + # no separate entitlement gate needed; users self-limit via rate limiting. + if model == "advanced": + sdk_model = _normalize_model_name("anthropic/claude-opus-4-6") + logger.info( + "[SDK] [%s] Per-request model override: advanced (%s)", + session_id[:12] if session_id else "?", + sdk_model, + ) + elif model == "standard": + sdk_model = _normalize_model_name(config.model) + logger.info( + "[SDK] [%s] Per-request model override: standard (%s)", + session_id[:12] if session_id else "?", + sdk_model, + ) + + # Compute rate-limit cost multiplier based on the final model. + # Opus costs 5× more than Sonnet (Anthropic pricing: $15/$75 vs $3/$15 per M tokens). + # This multiplier scales the token counter in record_token_usage so that + # Opus turns deplete the rate limit proportionally faster. + _OPUS_COST_MULTIPLIER = 5.0 + model_cost_multiplier = ( + _OPUS_COST_MULTIPLIER if sdk_model and "opus" in sdk_model else 1.0 + ) + # Track SDK-internal compaction (PreCompact hook → start, next msg → end) compaction = CompactionTracker() @@ -2944,6 +2978,7 @@ async def stream_chat_completion_sdk( cost_usd=turn_cost_usd, model=config.model, provider="anthropic", + model_cost_multiplier=model_cost_multiplier, ) # --- Persist session messages --- diff --git a/autogpt_platform/backend/backend/copilot/sdk/service_helpers_test.py b/autogpt_platform/backend/backend/copilot/sdk/service_helpers_test.py index 8aa3ce4d44..3cfb44bab2 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service_helpers_test.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service_helpers_test.py @@ -19,6 +19,7 @@ from .service import ( _is_prompt_too_long, _is_tool_only_message, _iter_sdk_messages, + _normalize_model_name, _reduce_context, _resolve_user_model_override, ) @@ -427,3 +428,44 @@ class TestResolveUserModelOverride: with patch("backend.copilot.sdk.service.get_feature_flag_value", new=ld_mock): await _resolve_user_model_override("user-abc") ld_mock.assert_called_once_with("copilot-model", "user-abc", default=None) + + +# --------------------------------------------------------------------------- +# _normalize_model_name — used by per-request model override +# --------------------------------------------------------------------------- + + +class TestNormalizeModelName: + """Unit tests for the model-name normalisation helper. + + The per-request model toggle calls _normalize_model_name with either + ``"anthropic/claude-opus-4-6"`` (for 'advanced') or ``config.model`` (for + 'standard'). These tests verify the OpenRouter/provider-prefix stripping + that keeps the value compatible with the Claude CLI. + """ + + def test_strips_anthropic_prefix(self): + assert _normalize_model_name("anthropic/claude-opus-4-6") == "claude-opus-4-6" + + def test_strips_openai_prefix(self): + assert _normalize_model_name("openai/gpt-4o") == "gpt-4o" + + def test_strips_google_prefix(self): + assert _normalize_model_name("google/gemini-2.5-flash") == "gemini-2.5-flash" + + def test_already_normalized_unchanged(self): + assert ( + _normalize_model_name("claude-sonnet-4-20250514") + == "claude-sonnet-4-20250514" + ) + + def test_empty_string_unchanged(self): + assert _normalize_model_name("") == "" + + def test_opus_model_roundtrip(self): + """The exact string used for the 'opus' toggle strips correctly.""" + assert _normalize_model_name("anthropic/claude-opus-4-6") == "claude-opus-4-6" + + def test_sonnet_openrouter_model(self): + """Sonnet model as stored in config (OpenRouter-prefixed) strips cleanly.""" + assert _normalize_model_name("anthropic/claude-sonnet-4") == "claude-sonnet-4" diff --git a/autogpt_platform/backend/backend/copilot/token_tracking.py b/autogpt_platform/backend/backend/copilot/token_tracking.py index e84b64d449..19406ced93 100644 --- a/autogpt_platform/backend/backend/copilot/token_tracking.py +++ b/autogpt_platform/backend/backend/copilot/token_tracking.py @@ -96,6 +96,7 @@ async def persist_and_record_usage( cost_usd: float | str | None = None, model: str | None = None, provider: str = "open_router", + model_cost_multiplier: float = 1.0, ) -> int: """Persist token usage to session and record for rate limiting. @@ -109,6 +110,9 @@ async def persist_and_record_usage( log_prefix: Prefix for log messages (e.g. "[SDK]", "[Baseline]"). cost_usd: Optional cost for logging (float from SDK, str otherwise). provider: Cost provider name (e.g. "anthropic", "open_router"). + model_cost_multiplier: Relative model cost factor for rate limiting + (1.0 = Sonnet/default, 5.0 = Opus). Scales the token counter so + more expensive models deplete the rate limit proportionally faster. Returns: The computed total_tokens (prompt + completion; cache excluded). @@ -163,6 +167,7 @@ async def persist_and_record_usage( completion_tokens=completion_tokens, cache_read_tokens=cache_read_tokens, cache_creation_tokens=cache_creation_tokens, + model_cost_multiplier=model_cost_multiplier, ) except Exception as usage_err: logger.warning("%s Failed to record token usage: %s", log_prefix, usage_err) diff --git a/autogpt_platform/backend/backend/copilot/token_tracking_test.py b/autogpt_platform/backend/backend/copilot/token_tracking_test.py index 04c7667368..11757ce541 100644 --- a/autogpt_platform/backend/backend/copilot/token_tracking_test.py +++ b/autogpt_platform/backend/backend/copilot/token_tracking_test.py @@ -230,6 +230,7 @@ class TestRateLimitRecording: completion_tokens=50, cache_read_tokens=1000, cache_creation_tokens=200, + model_cost_multiplier=1.0, ) @pytest.mark.asyncio diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatInput/ChatInput.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatInput/ChatInput.tsx index d1e1ca4f9d..9dc10f2ed1 100644 --- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatInput/ChatInput.tsx +++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatInput/ChatInput.tsx @@ -13,6 +13,7 @@ import { ChangeEvent, useEffect, useState } from "react"; import { AttachmentMenu } from "./components/AttachmentMenu"; import { DryRunToggleButton } from "./components/DryRunToggleButton"; import { FileChips } from "./components/FileChips"; +import { ModelToggleButton } from "./components/ModelToggleButton"; import { ModeToggleButton } from "./components/ModeToggleButton"; import { RecordingButton } from "./components/RecordingButton"; import { RecordingIndicator } from "./components/RecordingIndicator"; @@ -50,8 +51,14 @@ export function ChatInput({ onDroppedFilesConsumed, hasSession = false, }: Props) { - const { copilotMode, setCopilotMode, isDryRun, setIsDryRun } = - useCopilotUIStore(); + const { + copilotMode, + setCopilotMode, + copilotModel, + setCopilotLlmModel, + isDryRun, + setIsDryRun, + } = useCopilotUIStore(); const showModeToggle = useGetFlag(Flag.CHAT_MODE_OPTION); const showDryRunToggle = showModeToggle; const [files, setFiles] = useState([]); @@ -72,6 +79,21 @@ export function ChatInput({ }); } + function handleToggleModel() { + const next = copilotModel === "advanced" ? "standard" : "advanced"; + setCopilotLlmModel(next); + toast({ + title: + next === "advanced" + ? "Switched to Advanced model" + : "Switched to Standard model", + description: + next === "advanced" + ? "Using the highest-capability model." + : "Using the balanced standard model.", + }); + } + function handleToggleDryRun() { const next = !isDryRun; setIsDryRun(next); @@ -202,6 +224,12 @@ export function ChatInput({ onToggle={handleToggleMode} /> )} + {showModeToggle && !isStreaming && ( + + )} {showDryRunToggle && (!hasSession || isDryRun) && ( void; +} + +export function ModelToggleButton({ model, onToggle }: Props) { + const isAdvanced = model === "advanced"; + return ( + + ); +} diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatInput/components/__tests__/ModelToggleButton.test.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatInput/components/__tests__/ModelToggleButton.test.tsx new file mode 100644 index 0000000000..a77cb5b6f4 --- /dev/null +++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatInput/components/__tests__/ModelToggleButton.test.tsx @@ -0,0 +1,36 @@ +import { render, screen, fireEvent, cleanup } from "@testing-library/react"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { ModelToggleButton } from "../ModelToggleButton"; + +afterEach(cleanup); + +describe("ModelToggleButton", () => { + it("shows no label when model is standard", () => { + render(); + expect(screen.queryByText("Advanced")).toBeNull(); + }); + + it("shows Advanced label when model is advanced", () => { + render(); + expect(screen.getByText("Advanced")).toBeTruthy(); + }); + + it("calls onToggle when clicked", () => { + const onToggle = vi.fn(); + render(); + fireEvent.click(screen.getByRole("button")); + expect(onToggle).toHaveBeenCalledTimes(1); + }); + + it("sets aria-pressed=false for standard", () => { + render(); + const btn = screen.getByLabelText("Switch to Advanced model"); + expect(btn.getAttribute("aria-pressed")).toBe("false"); + }); + + it("sets aria-pressed=true for advanced", () => { + render(); + const btn = screen.getByLabelText("Switch to Standard model"); + expect(btn.getAttribute("aria-pressed")).toBe("true"); + }); +}); diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/store.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/store.ts index ebd9c3811f..bab24d38ce 100644 --- a/autogpt_platform/frontend/src/app/(platform)/copilot/store.ts +++ b/autogpt_platform/frontend/src/app/(platform)/copilot/store.ts @@ -52,6 +52,9 @@ export const DEFAULT_PANEL_WIDTH = 600; /** Autopilot response mode. */ export type CopilotMode = "extended_thinking" | "fast"; +/** Per-request model tier. 'standard' = current default; 'advanced' = highest-capability. */ +export type CopilotLlmModel = "standard" | "advanced"; + const isClient = typeof window !== "undefined"; function getPersistedWidth(): number { @@ -131,6 +134,10 @@ interface CopilotUIState { copilotMode: CopilotMode; setCopilotMode: (mode: CopilotMode) => void; + /** Model tier: 'standard' (default) or 'advanced' (highest-capability). */ + copilotModel: CopilotLlmModel; + setCopilotLlmModel: (model: CopilotLlmModel) => void; + /** Developer dry-run mode: sessions created with dry_run=true. */ isDryRun: boolean; setIsDryRun: (enabled: boolean) => void; @@ -280,6 +287,15 @@ export const useCopilotUIStore = create((set) => ({ set({ copilotMode: mode }); }, + copilotModel: (() => { + const saved = isClient ? storage.get(Key.COPILOT_MODEL) : null; + return saved === "advanced" ? "advanced" : "standard"; + })(), + setCopilotLlmModel: (model) => { + storage.set(Key.COPILOT_MODEL, model); + set({ copilotModel: model }); + }, + isDryRun: isClient && storage.get(Key.COPILOT_DRY_RUN) === "true", setIsDryRun: (enabled) => { if (enabled) { @@ -299,6 +315,7 @@ export const useCopilotUIStore = create((set) => ({ storage.clean(Key.COPILOT_ARTIFACT_PANEL_WIDTH); storage.clean(Key.COPILOT_COMPLETED_SESSIONS); storage.clean(Key.COPILOT_DRY_RUN); + storage.clean(Key.COPILOT_MODEL); set({ completedSessionIDs: new Set(), isNotificationsEnabled: false, @@ -312,6 +329,7 @@ export const useCopilotUIStore = create((set) => ({ history: [], }, copilotMode: "extended_thinking", + copilotModel: "standard", isDryRun: false, }); if (isClient) { diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotPage.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotPage.ts index f8b0387c6b..a0ae29d92f 100644 --- a/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotPage.ts +++ b/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotPage.ts @@ -43,6 +43,7 @@ export function useCopilotPage() { isDrawerOpen, setDrawerOpen, copilotMode, + copilotModel, isDryRun, } = useCopilotUIStore(); @@ -79,6 +80,7 @@ export function useCopilotPage() { hasActiveStream, refetchSession, copilotMode: isModeToggleEnabled ? copilotMode : undefined, + copilotModel: isModeToggleEnabled ? copilotModel : undefined, }); const { olderMessages, hasMore, isLoadingMore, loadMore } = diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotStream.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotStream.ts index 918047d3d8..14ea672bfb 100644 --- a/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotStream.ts +++ b/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotStream.ts @@ -18,7 +18,7 @@ import { resolveInProgressTools, getSendSuppressionReason, } from "./helpers"; -import type { CopilotMode } from "./store"; +import type { CopilotLlmModel, CopilotMode } from "./store"; const RECONNECT_BASE_DELAY_MS = 1_000; const RECONNECT_MAX_ATTEMPTS = 3; @@ -33,6 +33,8 @@ interface UseCopilotStreamArgs { refetchSession: () => Promise<{ data?: unknown }>; /** Autopilot mode to use for requests. `undefined` = let backend decide via feature flags. */ copilotMode: CopilotMode | undefined; + /** Model tier override. `undefined` = let backend decide. */ + copilotModel: CopilotLlmModel | undefined; } export function useCopilotStream({ @@ -41,17 +43,20 @@ export function useCopilotStream({ hasActiveStream, refetchSession, copilotMode, + copilotModel, }: UseCopilotStreamArgs) { const queryClient = useQueryClient(); const [rateLimitMessage, setRateLimitMessage] = useState(null); function dismissRateLimit() { setRateLimitMessage(null); } - // Use a ref for copilotMode so the transport closure always reads the - // latest value without recreating the DefaultChatTransport (which would + // Use refs for copilotMode and copilotModel so the transport closure always reads + // the latest value without recreating the DefaultChatTransport (which would // reset useChat's internal Chat instance and break mid-session streaming). const copilotModeRef = useRef(copilotMode); copilotModeRef.current = copilotMode; + const copilotModelRef = useRef(copilotModel); + copilotModelRef.current = copilotModel; // Connect directly to the Python backend for SSE, bypassing the Next.js // serverless proxy. This eliminates the Vercel 800s function timeout that @@ -83,6 +88,7 @@ export function useCopilotStream({ context: null, file_ids: fileIds && fileIds.length > 0 ? fileIds : null, mode: copilotModeRef.current ?? null, + model: copilotModelRef.current ?? null, }, headers: await getCopilotAuthHeaders(), }; diff --git a/autogpt_platform/frontend/src/services/storage/local-storage.ts b/autogpt_platform/frontend/src/services/storage/local-storage.ts index de31967d53..b5c0392ecd 100644 --- a/autogpt_platform/frontend/src/services/storage/local-storage.ts +++ b/autogpt_platform/frontend/src/services/storage/local-storage.ts @@ -17,6 +17,7 @@ export enum Key { COPILOT_NOTIFICATION_DIALOG_DISMISSED = "copilot-notification-dialog-dismissed", COPILOT_ARTIFACT_PANEL_WIDTH = "copilot-artifact-panel-width", COPILOT_MODE = "copilot-mode", + COPILOT_MODEL = "copilot-model", COPILOT_COMPLETED_SESSIONS = "copilot-completed-sessions", COPILOT_DRY_RUN = "copilot-dry-run", }