feat(backend): Add GPT-5.2 and update default models (#11652)

### Changes 🏗️ - Add OpenAI `GPT-5.2` with metadata&cost - Add const `DEFAULT_LLM_MODEL` (set to GPT-5.2) and use it instead of hardcoded model across llm blocks and tests ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan: - [x] GPT-5.2 is set as default and works on llm blocks
2026-04-08 03:00:28 -04:00 · 2026-01-06 01:13:35 +09:00
parent 266e0d79d4
commit ac2daee5f8
8 changed files with 51 additions and 45 deletions
--- a/autogpt_platform/backend/backend/api/rest_api.py
+++ b/autogpt_platform/backend/backend/api/rest_api.py
@@ -39,7 +39,7 @@ import backend.data.user
 import backend.integrations.webhooks.utils
 import backend.util.service
 import backend.util.settings
-from backend.blocks.llm import LlmModel
+from backend.blocks.llm import DEFAULT_LLM_MODEL
 from backend.data.model import Credentials
 from backend.integrations.providers import ProviderName
 from backend.monitoring.instrumentation import instrument_fastapi
@@ -113,7 +113,7 @@ async def lifespan_context(app: fastapi.FastAPI):

    await backend.data.user.migrate_and_encrypt_user_integrations()
    await backend.data.graph.fix_llm_provider_credentials()
-    await backend.data.graph.migrate_llm_models(LlmModel.GPT4O)
+    await backend.data.graph.migrate_llm_models(DEFAULT_LLM_MODEL)
    await backend.integrations.webhooks.utils.migrate_legacy_triggered_graphs()

    with launch_darkly_context():
--- a/autogpt_platform/backend/backend/blocks/ai_condition.py
+++ b/autogpt_platform/backend/backend/blocks/ai_condition.py
@@ -1,6 +1,7 @@
 from typing import Any

 from backend.blocks.llm import (
+    DEFAULT_LLM_MODEL,
    TEST_CREDENTIALS,
    TEST_CREDENTIALS_INPUT,
    AIBlockBase,
@@ -49,7 +50,7 @@ class AIConditionBlock(AIBlockBase):
        )
        model: LlmModel = SchemaField(
            title="LLM Model",
-            default=LlmModel.GPT4O,
+            default=DEFAULT_LLM_MODEL,
            description="The language model to use for evaluating the condition.",
            advanced=False,
        )
@@ -81,7 +82,7 @@ class AIConditionBlock(AIBlockBase):
                "condition": "the input is an email address",
                "yes_value": "Valid email",
                "no_value": "Not an email",
-                "model": LlmModel.GPT4O,
+                "model": DEFAULT_LLM_MODEL,
                "credentials": TEST_CREDENTIALS_INPUT,
            },
            test_credentials=TEST_CREDENTIALS,
--- a/autogpt_platform/backend/backend/blocks/llm.py
+++ b/autogpt_platform/backend/backend/blocks/llm.py
@@ -92,8 +92,9 @@ class LlmModel(str, Enum, metaclass=LlmModelMeta):
    O1 = "o1"
    O1_MINI = "o1-mini"
    # GPT-5 models
-    GPT5 = "gpt-5-2025-08-07"
+    GPT5_2 = "gpt-5.2-2025-12-11"
    GPT5_1 = "gpt-5.1-2025-11-13"
+    GPT5 = "gpt-5-2025-08-07"
    GPT5_MINI = "gpt-5-mini-2025-08-07"
    GPT5_NANO = "gpt-5-nano-2025-08-07"
    GPT5_CHAT = "gpt-5-chat-latest"
@@ -194,8 +195,9 @@ MODEL_METADATA = {
    LlmModel.O1: ModelMetadata("openai", 200000, 100000),  # o1-2024-12-17
    LlmModel.O1_MINI: ModelMetadata("openai", 128000, 65536),  # o1-mini-2024-09-12
    # GPT-5 models
-    LlmModel.GPT5: ModelMetadata("openai", 400000, 128000),
+    LlmModel.GPT5_2: ModelMetadata("openai", 400000, 128000),
    LlmModel.GPT5_1: ModelMetadata("openai", 400000, 128000),
+    LlmModel.GPT5: ModelMetadata("openai", 400000, 128000),
    LlmModel.GPT5_MINI: ModelMetadata("openai", 400000, 128000),
    LlmModel.GPT5_NANO: ModelMetadata("openai", 400000, 128000),
    LlmModel.GPT5_CHAT: ModelMetadata("openai", 400000, 16384),
@@ -303,6 +305,8 @@ MODEL_METADATA = {
    LlmModel.V0_1_0_MD: ModelMetadata("v0", 128000, 64000),
 }

+DEFAULT_LLM_MODEL = LlmModel.GPT5_2
+
 for model in LlmModel:
    if model not in MODEL_METADATA:
        raise ValueError(f"Missing MODEL_METADATA metadata for model: {model}")
@@ -790,7 +794,7 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
        )
        model: LlmModel = SchemaField(
            title="LLM Model",
-            default=LlmModel.GPT4O,
+            default=DEFAULT_LLM_MODEL,
            description="The language model to use for answering the prompt.",
            advanced=False,
        )
@@ -855,7 +859,7 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
            input_schema=AIStructuredResponseGeneratorBlock.Input,
            output_schema=AIStructuredResponseGeneratorBlock.Output,
            test_input={
-                "model": LlmModel.GPT4O,
+                "model": DEFAULT_LLM_MODEL,
                "credentials": TEST_CREDENTIALS_INPUT,
                "expected_format": {
                    "key1": "value1",
@@ -1221,7 +1225,7 @@ class AITextGeneratorBlock(AIBlockBase):
        )
        model: LlmModel = SchemaField(
            title="LLM Model",
-            default=LlmModel.GPT4O,
+            default=DEFAULT_LLM_MODEL,
            description="The language model to use for answering the prompt.",
            advanced=False,
        )
@@ -1317,7 +1321,7 @@ class AITextSummarizerBlock(AIBlockBase):
        )
        model: LlmModel = SchemaField(
            title="LLM Model",
-            default=LlmModel.GPT4O,
+            default=DEFAULT_LLM_MODEL,
            description="The language model to use for summarizing the text.",
        )
        focus: str = SchemaField(
@@ -1534,7 +1538,7 @@ class AIConversationBlock(AIBlockBase):
        )
        model: LlmModel = SchemaField(
            title="LLM Model",
-            default=LlmModel.GPT4O,
+            default=DEFAULT_LLM_MODEL,
            description="The language model to use for the conversation.",
        )
        credentials: AICredentials = AICredentialsField()
@@ -1572,7 +1576,7 @@ class AIConversationBlock(AIBlockBase):
                    },
                    {"role": "user", "content": "Where was it played?"},
                ],
-                "model": LlmModel.GPT4O,
+                "model": DEFAULT_LLM_MODEL,
                "credentials": TEST_CREDENTIALS_INPUT,
            },
            test_credentials=TEST_CREDENTIALS,
@@ -1635,7 +1639,7 @@ class AIListGeneratorBlock(AIBlockBase):
        )
        model: LlmModel = SchemaField(
            title="LLM Model",
-            default=LlmModel.GPT4O,
+            default=DEFAULT_LLM_MODEL,
            description="The language model to use for generating the list.",
            advanced=True,
        )
@@ -1692,7 +1696,7 @@ class AIListGeneratorBlock(AIBlockBase):
                    "drawing explorers to uncover its mysteries. Each planet showcases the limitless possibilities of "
                    "fictional worlds."
                ),
-                "model": LlmModel.GPT4O,
+                "model": DEFAULT_LLM_MODEL,
                "credentials": TEST_CREDENTIALS_INPUT,
                "max_retries": 3,
                "force_json_output": False,
--- a/autogpt_platform/backend/backend/blocks/smart_decision_maker.py
+++ b/autogpt_platform/backend/backend/blocks/smart_decision_maker.py
@@ -226,7 +226,7 @@ class SmartDecisionMakerBlock(Block):
        )
        model: llm.LlmModel = SchemaField(
            title="LLM Model",
-            default=llm.LlmModel.GPT4O,
+            default=llm.DEFAULT_LLM_MODEL,
            description="The language model to use for answering the prompt.",
            advanced=False,
        )
--- a/autogpt_platform/backend/backend/blocks/test/test_llm.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_llm.py
@@ -28,7 +28,7 @@ class TestLLMStatsTracking:

            response = await llm.llm_call(
                credentials=llm.TEST_CREDENTIALS,
-                llm_model=llm.LlmModel.GPT4O,
+                llm_model=llm.DEFAULT_LLM_MODEL,
                prompt=[{"role": "user", "content": "Hello"}],
                max_tokens=100,
            )
@@ -65,7 +65,7 @@ class TestLLMStatsTracking:
        input_data = llm.AIStructuredResponseGeneratorBlock.Input(
            prompt="Test prompt",
            expected_format={"key1": "desc1", "key2": "desc2"},
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore  # type: ignore
        )

@@ -109,7 +109,7 @@ class TestLLMStatsTracking:
        # Run the block
        input_data = llm.AITextGeneratorBlock.Input(
            prompt="Generate text",
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
        )

@@ -170,7 +170,7 @@ class TestLLMStatsTracking:
        input_data = llm.AIStructuredResponseGeneratorBlock.Input(
            prompt="Test prompt",
            expected_format={"key1": "desc1", "key2": "desc2"},
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
            retry=2,
        )
@@ -228,7 +228,7 @@ class TestLLMStatsTracking:

        input_data = llm.AITextSummarizerBlock.Input(
            text=long_text,
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
            max_tokens=100,  # Small chunks
            chunk_overlap=10,
@@ -299,7 +299,7 @@ class TestLLMStatsTracking:
            # Test with very short text (should only need 1 chunk + 1 final summary)
            input_data = llm.AITextSummarizerBlock.Input(
                text="This is a short text.",
-                model=llm.LlmModel.GPT4O,
+                model=llm.DEFAULT_LLM_MODEL,
                credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
                max_tokens=1000,  # Large enough to avoid chunking
            )
@@ -346,7 +346,7 @@ class TestLLMStatsTracking:
                {"role": "assistant", "content": "Hi there!"},
                {"role": "user", "content": "How are you?"},
            ],
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
        )

@@ -387,7 +387,7 @@ class TestLLMStatsTracking:
        # Run the block
        input_data = llm.AIListGeneratorBlock.Input(
            focus="test items",
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
            max_retries=3,
        )
@@ -469,7 +469,7 @@ class TestLLMStatsTracking:
        input_data = llm.AIStructuredResponseGeneratorBlock.Input(
            prompt="Test",
            expected_format={"result": "desc"},
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
        )

@@ -513,7 +513,7 @@ class TestAITextSummarizerValidation:
        # Create input data
        input_data = llm.AITextSummarizerBlock.Input(
            text="Some text to summarize",
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
            style=llm.SummaryStyle.BULLET_POINTS,
        )
@@ -558,7 +558,7 @@ class TestAITextSummarizerValidation:
        # Create input data
        input_data = llm.AITextSummarizerBlock.Input(
            text="Some text to summarize",
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
            style=llm.SummaryStyle.BULLET_POINTS,
            max_tokens=1000,
@@ -593,7 +593,7 @@ class TestAITextSummarizerValidation:
        # Create input data
        input_data = llm.AITextSummarizerBlock.Input(
            text="Some text to summarize",
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
        )

@@ -623,7 +623,7 @@ class TestAITextSummarizerValidation:
        # Create input data
        input_data = llm.AITextSummarizerBlock.Input(
            text="Some text to summarize",
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
            max_tokens=1000,
        )
@@ -654,7 +654,7 @@ class TestAITextSummarizerValidation:
        # Create input data
        input_data = llm.AITextSummarizerBlock.Input(
            text="Some text to summarize",
-            model=llm.LlmModel.GPT4O,
+            model=llm.DEFAULT_LLM_MODEL,
            credentials=llm.TEST_CREDENTIALS_INPUT,  # type: ignore
        )

--- a/autogpt_platform/backend/backend/blocks/test/test_smart_decision_maker.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_smart_decision_maker.py
@@ -233,7 +233,7 @@ async def test_smart_decision_maker_tracks_llm_stats():
        # Create test input
        input_data = SmartDecisionMakerBlock.Input(
            prompt="Should I continue with this task?",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            agent_mode_max_iterations=0,
        )
@@ -335,7 +335,7 @@ async def test_smart_decision_maker_parameter_validation():

        input_data = SmartDecisionMakerBlock.Input(
            prompt="Search for keywords",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            retry=2,  # Set retry to 2 for testing
            agent_mode_max_iterations=0,
@@ -402,7 +402,7 @@ async def test_smart_decision_maker_parameter_validation():

        input_data = SmartDecisionMakerBlock.Input(
            prompt="Search for keywords",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            agent_mode_max_iterations=0,
        )
@@ -462,7 +462,7 @@ async def test_smart_decision_maker_parameter_validation():

        input_data = SmartDecisionMakerBlock.Input(
            prompt="Search for keywords",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            agent_mode_max_iterations=0,
        )
@@ -526,7 +526,7 @@ async def test_smart_decision_maker_parameter_validation():

        input_data = SmartDecisionMakerBlock.Input(
            prompt="Search for keywords",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            agent_mode_max_iterations=0,
        )
@@ -648,7 +648,7 @@ async def test_smart_decision_maker_raw_response_conversion():

        input_data = SmartDecisionMakerBlock.Input(
            prompt="Test prompt",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            retry=2,
            agent_mode_max_iterations=0,
@@ -722,7 +722,7 @@ async def test_smart_decision_maker_raw_response_conversion():
    ):
        input_data = SmartDecisionMakerBlock.Input(
            prompt="Simple prompt",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            agent_mode_max_iterations=0,
        )
@@ -778,7 +778,7 @@ async def test_smart_decision_maker_raw_response_conversion():
    ):
        input_data = SmartDecisionMakerBlock.Input(
            prompt="Another test",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            agent_mode_max_iterations=0,
        )
@@ -931,7 +931,7 @@ async def test_smart_decision_maker_agent_mode():
        # Test agent mode with max_iterations = 3
        input_data = SmartDecisionMakerBlock.Input(
            prompt="Complete this task using tools",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            agent_mode_max_iterations=3,  # Enable agent mode with 3 max iterations
        )
@@ -1020,7 +1020,7 @@ async def test_smart_decision_maker_traditional_mode_default():
        # Test default behavior (traditional mode)
        input_data = SmartDecisionMakerBlock.Input(
            prompt="Test prompt",
-            model=llm_module.LlmModel.GPT4O,
+            model=llm_module.DEFAULT_LLM_MODEL,
            credentials=llm_module.TEST_CREDENTIALS_INPUT,  # type: ignore
            agent_mode_max_iterations=0,  # Traditional mode
        )
--- a/autogpt_platform/backend/backend/blocks/test/test_smart_decision_maker_dynamic_fields.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_smart_decision_maker_dynamic_fields.py
@@ -373,7 +373,7 @@ async def test_output_yielding_with_dynamic_fields():
            input_data = block.input_schema(
                prompt="Create a user dictionary",
                credentials=llm.TEST_CREDENTIALS_INPUT,
-                model=llm.LlmModel.GPT4O,
+                model=llm.DEFAULT_LLM_MODEL,
                agent_mode_max_iterations=0,  # Use traditional mode to test output yielding
            )

@@ -594,7 +594,7 @@ async def test_validation_errors_dont_pollute_conversation():
                input_data = block.input_schema(
                    prompt="Test prompt",
                    credentials=llm.TEST_CREDENTIALS_INPUT,
-                    model=llm.LlmModel.GPT4O,
+                    model=llm.DEFAULT_LLM_MODEL,
                    retry=3,  # Allow retries
                    agent_mode_max_iterations=1,
                )
--- a/autogpt_platform/backend/backend/data/block_cost_config.py
+++ b/autogpt_platform/backend/backend/data/block_cost_config.py
@@ -59,12 +59,13 @@ from backend.integrations.credentials_store import (

 MODEL_COST: dict[LlmModel, int] = {
    LlmModel.O3: 4,
-    LlmModel.O3_MINI: 2,  # $1.10 / $4.40
-    LlmModel.O1: 16,  # $15 / $60
+    LlmModel.O3_MINI: 2,
+    LlmModel.O1: 16,
    LlmModel.O1_MINI: 4,
    # GPT-5 models
-    LlmModel.GPT5: 2,
+    LlmModel.GPT5_2: 6,
    LlmModel.GPT5_1: 5,
+    LlmModel.GPT5: 2,
    LlmModel.GPT5_MINI: 1,
    LlmModel.GPT5_NANO: 1,
    LlmModel.GPT5_CHAT: 5,
@@ -87,7 +88,7 @@ MODEL_COST: dict[LlmModel, int] = {
    LlmModel.AIML_API_LLAMA3_3_70B: 1,
    LlmModel.AIML_API_META_LLAMA_3_1_70B: 1,
    LlmModel.AIML_API_LLAMA_3_2_3B: 1,
-    LlmModel.LLAMA3_3_70B: 1,  # $0.59 / $0.79
+    LlmModel.LLAMA3_3_70B: 1,
    LlmModel.LLAMA3_1_8B: 1,
    LlmModel.OLLAMA_LLAMA3_3: 1,
    LlmModel.OLLAMA_LLAMA3_2: 1,