improvement(providers): audit and update all provider model definitions (#3893)

* improvement(providers): audit and update all provider model definitions * fix(providers): add maxOutputTokens to azure/o3 and azure/o4-mini * fix(providers): move maxOutputTokens inside capabilities for azure models
2026-04-06 03:00:16 -04:00 · 2026-04-01 19:32:16 -07:00
parent 27a11a269d
commit 4c94f3cf78
3 changed files with 414 additions and 305 deletions
--- a/apps/sim/providers/anthropic/core.ts
+++ b/apps/sim/providers/anthropic/core.ts
@@ -77,17 +77,24 @@ const THINKING_BUDGET_TOKENS: Record<string, number> = {
 }

 /**
- * Checks if a model supports adaptive thinking (Opus 4.6+)
+ * Checks if a model supports adaptive thinking (thinking.type: "adaptive").
+ * Per the Anthropic API, only Opus 4.6 and Sonnet 4.6 support adaptive thinking.
+ * Opus 4.5 supports effort but NOT adaptive thinking — it uses budget_tokens with type: "enabled".
 */
 function supportsAdaptiveThinking(modelId: string): boolean {
  const normalizedModel = modelId.toLowerCase()
-  return normalizedModel.includes('opus-4-6') || normalizedModel.includes('opus-4.6')
+  return (
+    normalizedModel.includes('opus-4-6') ||
+    normalizedModel.includes('opus-4.6') ||
+    normalizedModel.includes('sonnet-4-6') ||
+    normalizedModel.includes('sonnet-4.6')
+  )
 }

 /**
 * Builds the thinking configuration for the Anthropic API based on model capabilities and level.
 *
- * - Opus 4.6: Uses adaptive thinking with effort parameter (recommended by Anthropic)
+ * - Opus 4.6, Sonnet 4.6: Uses adaptive thinking with effort parameter
 * - Other models: Uses budget_tokens-based extended thinking
 *
 * Returns both the thinking config and optional output_config for adaptive thinking.
@@ -104,7 +111,7 @@ function buildThinkingConfig(
    return null
  }

-  // Opus 4.6 uses adaptive thinking with effort parameter
+  // Models with effort support use adaptive thinking
  if (supportsAdaptiveThinking(modelId)) {
    return {
      thinking: { type: 'adaptive' },
--- a/apps/sim/providers/models.ts
+++ b/apps/sim/providers/models.ts
--- a/apps/sim/providers/utils.test.ts
+++ b/apps/sim/providers/utils.test.ts
@@ -392,7 +392,6 @@ describe('Model Capabilities', () => {
      expect(supportsThinking('claude-sonnet-4-5')).toBe(true)
      expect(supportsThinking('claude-sonnet-4-0')).toBe(true)
      expect(supportsThinking('claude-haiku-4-5')).toBe(true)
-      expect(supportsThinking('gemini-3-pro-preview')).toBe(true)
      expect(supportsThinking('gemini-3-flash-preview')).toBe(true)
    })

@@ -511,7 +510,6 @@ describe('Model Capabilities', () => {
      expect(MODELS_WITH_THINKING).toContain('claude-sonnet-4-5')
      expect(MODELS_WITH_THINKING).toContain('claude-sonnet-4-0')

-      expect(MODELS_WITH_THINKING).toContain('gemini-3-pro-preview')
      expect(MODELS_WITH_THINKING).toContain('gemini-3-flash-preview')

      expect(MODELS_WITH_THINKING).toContain('claude-haiku-4-5')
@@ -523,7 +521,12 @@ describe('Model Capabilities', () => {

    it.concurrent('should have GPT-5 models in both reasoning effort and verbosity arrays', () => {
      const gpt5ModelsWithReasoningEffort = MODELS_WITH_REASONING_EFFORT.filter(
-        (m) => m.includes('gpt-5') && !m.includes('chat-latest') && !m.includes('gpt-5.4-pro')
+        (m) =>
+          m.includes('gpt-5') &&
+          !m.includes('chat-latest') &&
+          !m.includes('gpt-5.4-pro') &&
+          !m.includes('gpt-5.2-pro') &&
+          !m.includes('gpt-5-pro')
      )
      const gpt5ModelsWithVerbosity = MODELS_WITH_VERBOSITY.filter(
        (m) => m.includes('gpt-5') && !m.includes('chat-latest')
@@ -533,6 +536,12 @@ describe('Model Capabilities', () => {
      expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5.4-pro')
      expect(MODELS_WITH_VERBOSITY).not.toContain('gpt-5.4-pro')

+      expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5.2-pro')
+      expect(MODELS_WITH_VERBOSITY).not.toContain('gpt-5.2-pro')
+
+      expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5-pro')
+      expect(MODELS_WITH_VERBOSITY).not.toContain('gpt-5-pro')
+
      expect(MODELS_WITH_REASONING_EFFORT).toContain('o1')
      expect(MODELS_WITH_VERBOSITY).not.toContain('o1')
    })
@@ -629,11 +638,6 @@ describe('Model Capabilities', () => {
    })

    it.concurrent('should return correct levels for Gemini 3 models', () => {
-      const proLevels = getThinkingLevelsForModel('gemini-3-pro-preview')
-      expect(proLevels).toBeDefined()
-      expect(proLevels).toContain('low')
-      expect(proLevels).toContain('high')
-
      const flashLevels = getThinkingLevelsForModel('gemini-3-flash-preview')
      expect(flashLevels).toBeDefined()
      expect(flashLevels).toContain('minimal')
@@ -669,7 +673,7 @@ describe('Max Output Tokens', () => {
    })

    it.concurrent('should return correct max for Claude Opus 4.1', () => {
-      expect(getMaxOutputTokensForModel('claude-opus-4-1')).toBe(64000)
+      expect(getMaxOutputTokensForModel('claude-opus-4-1')).toBe(32000)
    })

    it.concurrent('should return standard default for models without maxOutputTokens', () => {