Compare commits

...

1 Commits

Author SHA1 Message Date
Vikhyath Mondreti
dbe53f406b fix(max-tokens): anthropic models streaming vs non-streaming 2026-01-25 14:28:09 -08:00
6 changed files with 79 additions and 4 deletions

View File

@@ -513,6 +513,12 @@ Return ONLY the JSON array.`,
})(), })(),
}), }),
}, },
{
id: 'maxTokens',
title: 'Max Output Tokens',
type: 'short-input',
placeholder: 'Enter max tokens (e.g., 4096)...',
},
{ {
id: 'responseFormat', id: 'responseFormat',
title: 'Response Format', title: 'Response Format',
@@ -754,6 +760,7 @@ Example 3 (Array Input):
}, },
}, },
temperature: { type: 'number', description: 'Response randomness level' }, temperature: { type: 'number', description: 'Response randomness level' },
maxTokens: { type: 'number', description: 'Maximum number of tokens in the response' },
reasoningEffort: { type: 'string', description: 'Reasoning effort level for GPT-5 models' }, reasoningEffort: { type: 'string', description: 'Reasoning effort level for GPT-5 models' },
verbosity: { type: 'string', description: 'Verbosity level for GPT-5 models' }, verbosity: { type: 'string', description: 'Verbosity level for GPT-5 models' },
thinkingLevel: { type: 'string', description: 'Thinking level for Gemini 3 models' }, thinkingLevel: { type: 'string', description: 'Thinking level for Gemini 3 models' },

View File

@@ -9,6 +9,7 @@ import {
generateToolUseId, generateToolUseId,
} from '@/providers/anthropic/utils' } from '@/providers/anthropic/utils'
import { import {
getMaxOutputTokensForModel,
getProviderDefaultModel, getProviderDefaultModel,
getProviderModels, getProviderModels,
supportsNativeStructuredOutputs, supportsNativeStructuredOutputs,
@@ -178,7 +179,9 @@ export const anthropicProvider: ProviderConfig = {
model: request.model, model: request.model,
messages, messages,
system: systemPrompt, system: systemPrompt,
max_tokens: Number.parseInt(String(request.maxTokens)) || 1024, max_tokens:
Number.parseInt(String(request.maxTokens)) ||
getMaxOutputTokensForModel(request.model, request.stream ?? false),
temperature: Number.parseFloat(String(request.temperature ?? 0.7)), temperature: Number.parseFloat(String(request.temperature ?? 0.7)),
} }

View File

@@ -20,7 +20,11 @@ import {
generateToolUseId, generateToolUseId,
getBedrockInferenceProfileId, getBedrockInferenceProfileId,
} from '@/providers/bedrock/utils' } from '@/providers/bedrock/utils'
import { getProviderDefaultModel, getProviderModels } from '@/providers/models' import {
getMaxOutputTokensForModel,
getProviderDefaultModel,
getProviderModels,
} from '@/providers/models'
import type { import type {
ProviderConfig, ProviderConfig,
ProviderRequest, ProviderRequest,
@@ -259,7 +263,9 @@ export const bedrockProvider: ProviderConfig = {
const inferenceConfig = { const inferenceConfig = {
temperature: Number.parseFloat(String(request.temperature ?? 0.7)), temperature: Number.parseFloat(String(request.temperature ?? 0.7)),
maxTokens: Number.parseInt(String(request.maxTokens)) || 4096, maxTokens:
Number.parseInt(String(request.maxTokens)) ||
getMaxOutputTokensForModel(request.model, request.stream ?? false),
} }
const shouldStreamToolCalls = request.streamToolCalls ?? false const shouldStreamToolCalls = request.streamToolCalls ?? false

View File

@@ -34,6 +34,12 @@ export interface ModelCapabilities {
toolUsageControl?: boolean toolUsageControl?: boolean
computerUse?: boolean computerUse?: boolean
nativeStructuredOutputs?: boolean nativeStructuredOutputs?: boolean
maxOutputTokens?: {
/** Maximum tokens for streaming requests */
max: number
/** Safe default for non-streaming requests (to avoid timeout issues) */
default: number
}
reasoningEffort?: { reasoningEffort?: {
values: string[] values: string[]
} }
@@ -613,6 +619,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: { capabilities: {
temperature: { min: 0, max: 1 }, temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true, nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 4096 },
}, },
contextWindow: 200000, contextWindow: 200000,
}, },
@@ -627,6 +634,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: { capabilities: {
temperature: { min: 0, max: 1 }, temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true, nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 4096 },
}, },
contextWindow: 200000, contextWindow: 200000,
}, },
@@ -640,6 +648,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
}, },
capabilities: { capabilities: {
temperature: { min: 0, max: 1 }, temperature: { min: 0, max: 1 },
maxOutputTokens: { max: 64000, default: 4096 },
}, },
contextWindow: 200000, contextWindow: 200000,
}, },
@@ -654,6 +663,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: { capabilities: {
temperature: { min: 0, max: 1 }, temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true, nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 4096 },
}, },
contextWindow: 200000, contextWindow: 200000,
}, },
@@ -668,6 +678,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: { capabilities: {
temperature: { min: 0, max: 1 }, temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true, nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 4096 },
}, },
contextWindow: 200000, contextWindow: 200000,
}, },
@@ -681,6 +692,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
}, },
capabilities: { capabilities: {
temperature: { min: 0, max: 1 }, temperature: { min: 0, max: 1 },
maxOutputTokens: { max: 64000, default: 4096 },
}, },
contextWindow: 200000, contextWindow: 200000,
}, },
@@ -695,6 +707,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: { capabilities: {
temperature: { min: 0, max: 1 }, temperature: { min: 0, max: 1 },
computerUse: true, computerUse: true,
maxOutputTokens: { max: 8192, default: 8192 },
}, },
contextWindow: 200000, contextWindow: 200000,
}, },
@@ -709,6 +722,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: { capabilities: {
temperature: { min: 0, max: 1 }, temperature: { min: 0, max: 1 },
computerUse: true, computerUse: true,
maxOutputTokens: { max: 8192, default: 8192 },
}, },
contextWindow: 200000, contextWindow: 200000,
}, },
@@ -1655,6 +1669,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: { capabilities: {
temperature: { min: 0, max: 1 }, temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true, nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 4096 },
}, },
contextWindow: 200000, contextWindow: 200000,
}, },
@@ -1668,6 +1683,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: { capabilities: {
temperature: { min: 0, max: 1 }, temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true, nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 4096 },
}, },
contextWindow: 200000, contextWindow: 200000,
}, },
@@ -1681,6 +1697,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: { capabilities: {
temperature: { min: 0, max: 1 }, temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true, nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 4096 },
}, },
contextWindow: 200000, contextWindow: 200000,
}, },
@@ -1694,6 +1711,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: { capabilities: {
temperature: { min: 0, max: 1 }, temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true, nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 4096 },
}, },
contextWindow: 200000, contextWindow: 200000,
}, },
@@ -2333,3 +2351,31 @@ export function getThinkingLevelsForModel(modelId: string): string[] | null {
const capability = getThinkingCapability(modelId) const capability = getThinkingCapability(modelId)
return capability?.levels ?? null return capability?.levels ?? null
} }
/**
* Get the max output tokens for a specific model
* Returns the model's max capacity for streaming requests,
* or the model's safe default for non-streaming requests to avoid timeout issues.
*
* @param modelId - The model ID
* @param streaming - Whether the request is streaming (default: false)
*/
export function getMaxOutputTokensForModel(modelId: string, streaming = false): number {
const normalizedModelId = modelId.toLowerCase()
const STANDARD_MAX_OUTPUT_TOKENS = 4096
for (const provider of Object.values(PROVIDER_DEFINITIONS)) {
for (const model of provider.models) {
const baseModelId = model.id.toLowerCase()
if (normalizedModelId === baseModelId || normalizedModelId.startsWith(`${baseModelId}-`)) {
const outputTokens = model.capabilities.maxOutputTokens
if (outputTokens) {
return streaming ? outputTokens.max : outputTokens.default
}
return STANDARD_MAX_OUTPUT_TOKENS
}
}
}
return STANDARD_MAX_OUTPUT_TOKENS
}

View File

@@ -8,6 +8,7 @@ import {
getComputerUseModels, getComputerUseModels,
getEmbeddingModelPricing, getEmbeddingModelPricing,
getHostedModels as getHostedModelsFromDefinitions, getHostedModels as getHostedModelsFromDefinitions,
getMaxOutputTokensForModel as getMaxOutputTokensForModelFromDefinitions,
getMaxTemperature as getMaxTempFromDefinitions, getMaxTemperature as getMaxTempFromDefinitions,
getModelPricing as getModelPricingFromDefinitions, getModelPricing as getModelPricingFromDefinitions,
getModelsWithReasoningEffort, getModelsWithReasoningEffort,
@@ -992,6 +993,18 @@ export function getThinkingLevelsForModel(model: string): string[] | null {
return getThinkingLevelsForModelFromDefinitions(model) return getThinkingLevelsForModelFromDefinitions(model)
} }
/**
* Get max output tokens for a specific model
* Returns the model's maxOutputTokens capability for streaming requests,
* or a conservative default (8192) for non-streaming requests to avoid timeout issues.
*
* @param model - The model ID
* @param streaming - Whether the request is streaming (default: false)
*/
export function getMaxOutputTokensForModel(model: string, streaming = false): number {
return getMaxOutputTokensForModelFromDefinitions(model, streaming)
}
/** /**
* Prepare tool execution parameters, separating tool parameters from system parameters * Prepare tool execution parameters, separating tool parameters from system parameters
*/ */

View File

@@ -5,7 +5,7 @@ import type { ToolConfig, ToolResponse } from '@/tools/types'
const logger = createLogger('BrowserUseTool') const logger = createLogger('BrowserUseTool')
const POLL_INTERVAL_MS = 5000 const POLL_INTERVAL_MS = 5000
const MAX_POLL_TIME_MS = 180000 const MAX_POLL_TIME_MS = 600000 // 10 minutes
const MAX_CONSECUTIVE_ERRORS = 3 const MAX_CONSECUTIVE_ERRORS = 3
async function createSessionWithProfile( async function createSessionWithProfile(