fix(max-tokens): anthropic models streaming vs non-streaming

2026-01-26 07:18:38 -05:00 · 2026-01-25 14:28:09 -08:00
6 changed files with 79 additions and 4 deletions
--- a/apps/sim/blocks/blocks/agent.ts
+++ b/apps/sim/blocks/blocks/agent.ts
@@ -513,6 +513,12 @@ Return ONLY the JSON array.`,
        })(),
      }),
    },
+    {
+      id: 'maxTokens',
+      title: 'Max Output Tokens',
+      type: 'short-input',
+      placeholder: 'Enter max tokens (e.g., 4096)...',
+    },
    {
      id: 'responseFormat',
      title: 'Response Format',
@@ -754,6 +760,7 @@ Example 3 (Array Input):
      },
    },
    temperature: { type: 'number', description: 'Response randomness level' },
+    maxTokens: { type: 'number', description: 'Maximum number of tokens in the response' },
    reasoningEffort: { type: 'string', description: 'Reasoning effort level for GPT-5 models' },
    verbosity: { type: 'string', description: 'Verbosity level for GPT-5 models' },
    thinkingLevel: { type: 'string', description: 'Thinking level for Gemini 3 models' },
--- a/apps/sim/providers/anthropic/index.ts
+++ b/apps/sim/providers/anthropic/index.ts
@@ -9,6 +9,7 @@ import {
  generateToolUseId,
 } from '@/providers/anthropic/utils'
 import {
+  getMaxOutputTokensForModel,
  getProviderDefaultModel,
  getProviderModels,
  supportsNativeStructuredOutputs,
@@ -178,7 +179,9 @@ export const anthropicProvider: ProviderConfig = {
      model: request.model,
      messages,
      system: systemPrompt,
-      max_tokens: Number.parseInt(String(request.maxTokens)) || 1024,
+      max_tokens:
+        Number.parseInt(String(request.maxTokens)) ||
+        getMaxOutputTokensForModel(request.model, request.stream ?? false),
      temperature: Number.parseFloat(String(request.temperature ?? 0.7)),
    }

--- a/apps/sim/providers/bedrock/index.ts
+++ b/apps/sim/providers/bedrock/index.ts
@@ -20,7 +20,11 @@ import {
  generateToolUseId,
  getBedrockInferenceProfileId,
 } from '@/providers/bedrock/utils'
-import { getProviderDefaultModel, getProviderModels } from '@/providers/models'
+import {
+  getMaxOutputTokensForModel,
+  getProviderDefaultModel,
+  getProviderModels,
+} from '@/providers/models'
 import type {
  ProviderConfig,
  ProviderRequest,
@@ -259,7 +263,9 @@ export const bedrockProvider: ProviderConfig = {

    const inferenceConfig = {
      temperature: Number.parseFloat(String(request.temperature ?? 0.7)),
-      maxTokens: Number.parseInt(String(request.maxTokens)) || 4096,
+      maxTokens:
+        Number.parseInt(String(request.maxTokens)) ||
+        getMaxOutputTokensForModel(request.model, request.stream ?? false),
    }

    const shouldStreamToolCalls = request.streamToolCalls ?? false
--- a/apps/sim/providers/models.ts
+++ b/apps/sim/providers/models.ts
@@ -34,6 +34,12 @@ export interface ModelCapabilities {
  toolUsageControl?: boolean
  computerUse?: boolean
  nativeStructuredOutputs?: boolean
+  maxOutputTokens?: {
+    /** Maximum tokens for streaming requests */
+    max: number
+    /** Safe default for non-streaming requests (to avoid timeout issues) */
+    default: number
+  }
  reasoningEffort?: {
    values: string[]
  }
@@ -613,6 +619,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        capabilities: {
          temperature: { min: 0, max: 1 },
          nativeStructuredOutputs: true,
+          maxOutputTokens: { max: 64000, default: 4096 },
        },
        contextWindow: 200000,
      },
@@ -627,6 +634,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        capabilities: {
          temperature: { min: 0, max: 1 },
          nativeStructuredOutputs: true,
+          maxOutputTokens: { max: 64000, default: 4096 },
        },
        contextWindow: 200000,
      },
@@ -640,6 +648,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          maxOutputTokens: { max: 64000, default: 4096 },
        },
        contextWindow: 200000,
      },
@@ -654,6 +663,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        capabilities: {
          temperature: { min: 0, max: 1 },
          nativeStructuredOutputs: true,
+          maxOutputTokens: { max: 64000, default: 4096 },
        },
        contextWindow: 200000,
      },
@@ -668,6 +678,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        capabilities: {
          temperature: { min: 0, max: 1 },
          nativeStructuredOutputs: true,
+          maxOutputTokens: { max: 64000, default: 4096 },
        },
        contextWindow: 200000,
      },
@@ -681,6 +692,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          maxOutputTokens: { max: 64000, default: 4096 },
        },
        contextWindow: 200000,
      },
@@ -695,6 +707,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        capabilities: {
          temperature: { min: 0, max: 1 },
          computerUse: true,
+          maxOutputTokens: { max: 8192, default: 8192 },
        },
        contextWindow: 200000,
      },
@@ -709,6 +722,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        capabilities: {
          temperature: { min: 0, max: 1 },
          computerUse: true,
+          maxOutputTokens: { max: 8192, default: 8192 },
        },
        contextWindow: 200000,
      },
@@ -1655,6 +1669,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        capabilities: {
          temperature: { min: 0, max: 1 },
          nativeStructuredOutputs: true,
+          maxOutputTokens: { max: 64000, default: 4096 },
        },
        contextWindow: 200000,
      },
@@ -1668,6 +1683,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        capabilities: {
          temperature: { min: 0, max: 1 },
          nativeStructuredOutputs: true,
+          maxOutputTokens: { max: 64000, default: 4096 },
        },
        contextWindow: 200000,
      },
@@ -1681,6 +1697,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        capabilities: {
          temperature: { min: 0, max: 1 },
          nativeStructuredOutputs: true,
+          maxOutputTokens: { max: 64000, default: 4096 },
        },
        contextWindow: 200000,
      },
@@ -1694,6 +1711,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        capabilities: {
          temperature: { min: 0, max: 1 },
          nativeStructuredOutputs: true,
+          maxOutputTokens: { max: 64000, default: 4096 },
        },
        contextWindow: 200000,
      },
@@ -2333,3 +2351,31 @@ export function getThinkingLevelsForModel(modelId: string): string[] | null {
  const capability = getThinkingCapability(modelId)
  return capability?.levels ?? null
 }
+
+/**
+ * Get the max output tokens for a specific model
+ * Returns the model's max capacity for streaming requests,
+ * or the model's safe default for non-streaming requests to avoid timeout issues.
+ *
+ * @param modelId - The model ID
+ * @param streaming - Whether the request is streaming (default: false)
+ */
+export function getMaxOutputTokensForModel(modelId: string, streaming = false): number {
+  const normalizedModelId = modelId.toLowerCase()
+  const STANDARD_MAX_OUTPUT_TOKENS = 4096
+
+  for (const provider of Object.values(PROVIDER_DEFINITIONS)) {
+    for (const model of provider.models) {
+      const baseModelId = model.id.toLowerCase()
+      if (normalizedModelId === baseModelId || normalizedModelId.startsWith(`${baseModelId}-`)) {
+        const outputTokens = model.capabilities.maxOutputTokens
+        if (outputTokens) {
+          return streaming ? outputTokens.max : outputTokens.default
+        }
+        return STANDARD_MAX_OUTPUT_TOKENS
+      }
+    }
+  }
+
+  return STANDARD_MAX_OUTPUT_TOKENS
+}
--- a/apps/sim/providers/utils.ts
+++ b/apps/sim/providers/utils.ts
@@ -8,6 +8,7 @@ import {
  getComputerUseModels,
  getEmbeddingModelPricing,
  getHostedModels as getHostedModelsFromDefinitions,
+  getMaxOutputTokensForModel as getMaxOutputTokensForModelFromDefinitions,
  getMaxTemperature as getMaxTempFromDefinitions,
  getModelPricing as getModelPricingFromDefinitions,
  getModelsWithReasoningEffort,
@@ -992,6 +993,18 @@ export function getThinkingLevelsForModel(model: string): string[] | null {
  return getThinkingLevelsForModelFromDefinitions(model)
 }

+/**
+ * Get max output tokens for a specific model
+ * Returns the model's maxOutputTokens capability for streaming requests,
+ * or a conservative default (8192) for non-streaming requests to avoid timeout issues.
+ *
+ * @param model - The model ID
+ * @param streaming - Whether the request is streaming (default: false)
+ */
+export function getMaxOutputTokensForModel(model: string, streaming = false): number {
+  return getMaxOutputTokensForModelFromDefinitions(model, streaming)
+}
+
 /**
 * Prepare tool execution parameters, separating tool parameters from system parameters
 */
--- a/apps/sim/tools/browser_use/run_task.ts
+++ b/apps/sim/tools/browser_use/run_task.ts
@@ -5,7 +5,7 @@ import type { ToolConfig, ToolResponse } from '@/tools/types'
 const logger = createLogger('BrowserUseTool')

 const POLL_INTERVAL_MS = 5000
-const MAX_POLL_TIME_MS = 180000
+const MAX_POLL_TIME_MS = 600000 // 10 minutes
 const MAX_CONSECUTIVE_ERRORS = 3

 async function createSessionWithProfile(