feat(backend/blocks): Add GPT-5.1 and GPT-5.1-codex (#11406)

This pr adds the latest gpt-5.1 and gpt-5.1-codex llm's from openai, as well as update the price of the gpt-5-chat model https://platform.openai.com/docs/models/gpt-5.1 https://platform.openai.com/docs/models/gpt-5.1-codex I have also had to add a new codex block as it uses a different openai API and has other options the main llm's dont use <img width="231" height="755" alt="image" src="https://github.com/user-attachments/assets/a4056633-7b0f-446f-ae86-d7755c5b88ec" /> #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan:  - [x] Test the latest gpt-5.1 llm - [x] Test the latest gpt-5.1-codex block --------- Co-authored-by: Zamil Majdy <zamil.majdy@agpt.co> Co-authored-by: Claude <noreply@anthropic.com>
2026-04-08 03:00:28 -04:00 · 2025-11-25 01:33:11 -08:00
parent 5d97706bb8
commit 64a775dfa7
3 changed files with 243 additions and 1 deletions
--- a/autogpt_platform/backend/backend/blocks/codex.py
+++ b/autogpt_platform/backend/backend/blocks/codex.py
@@ -0,0 +1,224 @@
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Literal
+
+from openai import AsyncOpenAI
+from openai.types.responses import Response as OpenAIResponse
+from pydantic import SecretStr
+
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import (
+    APIKeyCredentials,
+    CredentialsField,
+    CredentialsMetaInput,
+    NodeExecutionStats,
+    SchemaField,
+)
+from backend.integrations.providers import ProviderName
+
+
+@dataclass
+class CodexCallResult:
+    """Structured response returned by Codex invocations."""
+
+    response: str
+    reasoning: str
+    response_id: str
+
+
+class CodexModel(str, Enum):
+    """Codex-capable OpenAI models."""
+
+    GPT5_1_CODEX = "gpt-5.1-codex"
+
+
+class CodexReasoningEffort(str, Enum):
+    """Configuration for the Responses API reasoning effort."""
+
+    NONE = "none"
+    LOW = "low"
+    MEDIUM = "medium"
+    HIGH = "high"
+
+
+CodexCredentials = CredentialsMetaInput[
+    Literal[ProviderName.OPENAI], Literal["api_key"]
+]
+
+TEST_CREDENTIALS = APIKeyCredentials(
+    id="e2fcb203-3f2d-4ad4-a344-8df3bc7db36b",
+    provider="openai",
+    api_key=SecretStr("mock-openai-api-key"),
+    title="Mock OpenAI API key",
+    expires_at=None,
+)
+TEST_CREDENTIALS_INPUT = {
+    "provider": TEST_CREDENTIALS.provider,
+    "id": TEST_CREDENTIALS.id,
+    "type": TEST_CREDENTIALS.type,
+    "title": TEST_CREDENTIALS.title,
+}
+
+
+def CodexCredentialsField() -> CodexCredentials:
+    return CredentialsField(
+        description="OpenAI API key with access to Codex models (Responses API).",
+    )
+
+
+class CodeGenerationBlock(Block):
+    """Block that talks to Codex models via the OpenAI Responses API."""
+
+    class Input(BlockSchemaInput):
+        prompt: str = SchemaField(
+            description="Primary coding request passed to the Codex model.",
+            placeholder="Generate a Python function that reverses a list.",
+        )
+        system_prompt: str = SchemaField(
+            title="System Prompt",
+            default=(
+                "You are Codex, an elite software engineer. "
+                "Favor concise, working code and highlight important caveats."
+            ),
+            description="Optional instructions injected via the Responses API instructions field.",
+            advanced=True,
+        )
+        model: CodexModel = SchemaField(
+            title="Codex Model",
+            default=CodexModel.GPT5_1_CODEX,
+            description="Codex-optimized model served via the Responses API.",
+            advanced=False,
+        )
+        reasoning_effort: CodexReasoningEffort = SchemaField(
+            title="Reasoning Effort",
+            default=CodexReasoningEffort.MEDIUM,
+            description="Controls the Responses API reasoning budget. Select 'none' to skip reasoning configs.",
+            advanced=True,
+        )
+        max_output_tokens: int | None = SchemaField(
+            title="Max Output Tokens",
+            default=2048,
+            description="Upper bound for generated tokens (hard limit 128,000). Leave blank to let OpenAI decide.",
+            advanced=True,
+        )
+        credentials: CodexCredentials = CodexCredentialsField()
+
+    class Output(BlockSchemaOutput):
+        response: str = SchemaField(
+            description="Code-focused response returned by the Codex model."
+        )
+        reasoning: str = SchemaField(
+            description="Reasoning summary returned by the model, if available.",
+            default="",
+        )
+        response_id: str = SchemaField(
+            description="ID of the Responses API call for auditing/debugging.",
+            default="",
+        )
+
+    def __init__(self):
+        super().__init__(
+            id="86a2a099-30df-47b4-b7e4-34ae5f83e0d5",
+            description="Generate or refactor code using OpenAI's Codex (Responses API).",
+            categories={BlockCategory.AI, BlockCategory.DEVELOPER_TOOLS},
+            input_schema=CodeGenerationBlock.Input,
+            output_schema=CodeGenerationBlock.Output,
+            test_input=[
+                {
+                    "prompt": "Write a TypeScript function that deduplicates an array.",
+                    "credentials": TEST_CREDENTIALS_INPUT,
+                }
+            ],
+            test_output=[
+                ("response", str),
+                ("reasoning", str),
+                ("response_id", str),
+            ],
+            test_mock={
+                "call_codex": lambda *_args, **_kwargs: CodexCallResult(
+                    response="function dedupe<T>(items: T[]): T[] { return [...new Set(items)]; }",
+                    reasoning="Used Set to remove duplicates in O(n).",
+                    response_id="resp_test",
+                )
+            },
+            test_credentials=TEST_CREDENTIALS,
+        )
+        self.execution_stats = NodeExecutionStats()
+
+    async def call_codex(
+        self,
+        *,
+        credentials: APIKeyCredentials,
+        model: CodexModel,
+        prompt: str,
+        system_prompt: str,
+        max_output_tokens: int | None,
+        reasoning_effort: CodexReasoningEffort,
+    ) -> CodexCallResult:
+        """Invoke the OpenAI Responses API."""
+        client = AsyncOpenAI(api_key=credentials.api_key.get_secret_value())
+
+        request_payload: dict[str, Any] = {
+            "model": model.value,
+            "input": prompt,
+        }
+        if system_prompt:
+            request_payload["instructions"] = system_prompt
+        if max_output_tokens is not None:
+            request_payload["max_output_tokens"] = max_output_tokens
+        if reasoning_effort != CodexReasoningEffort.NONE:
+            request_payload["reasoning"] = {"effort": reasoning_effort.value}
+
+        response = await client.responses.create(**request_payload)
+        if not isinstance(response, OpenAIResponse):
+            raise TypeError(f"Expected OpenAIResponse, got {type(response).__name__}")
+
+        # Extract data directly from typed response
+        text_output = response.output_text or ""
+        reasoning_summary = (
+            str(response.reasoning.summary)
+            if response.reasoning and response.reasoning.summary
+            else ""
+        )
+        response_id = response.id or ""
+
+        # Update usage stats
+        self.execution_stats.input_token_count = (
+            response.usage.input_tokens if response.usage else 0
+        )
+        self.execution_stats.output_token_count = (
+            response.usage.output_tokens if response.usage else 0
+        )
+        self.execution_stats.llm_call_count += 1
+
+        return CodexCallResult(
+            response=text_output,
+            reasoning=reasoning_summary,
+            response_id=response_id,
+        )
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        credentials: APIKeyCredentials,
+        **_kwargs,
+    ) -> BlockOutput:
+        result = await self.call_codex(
+            credentials=credentials,
+            model=input_data.model,
+            prompt=input_data.prompt,
+            system_prompt=input_data.system_prompt,
+            max_output_tokens=input_data.max_output_tokens,
+            reasoning_effort=input_data.reasoning_effort,
+        )
+
+        yield "response", result.response
+        yield "reasoning", result.reasoning
+        yield "response_id", result.response_id
--- a/autogpt_platform/backend/backend/blocks/llm.py
+++ b/autogpt_platform/backend/backend/blocks/llm.py
@@ -93,6 +93,7 @@ class LlmModel(str, Enum, metaclass=LlmModelMeta):
    O1_MINI = "o1-mini"
    # GPT-5 models
    GPT5 = "gpt-5-2025-08-07"
+    GPT5_1 = "gpt-5.1-2025-11-13"
    GPT5_MINI = "gpt-5-mini-2025-08-07"
    GPT5_NANO = "gpt-5-nano-2025-08-07"
    GPT5_CHAT = "gpt-5-chat-latest"
@@ -194,6 +195,7 @@ MODEL_METADATA = {
    LlmModel.O1_MINI: ModelMetadata("openai", 128000, 65536),  # o1-mini-2024-09-12
    # GPT-5 models
    LlmModel.GPT5: ModelMetadata("openai", 400000, 128000),
+    LlmModel.GPT5_1: ModelMetadata("openai", 400000, 128000),
    LlmModel.GPT5_MINI: ModelMetadata("openai", 400000, 128000),
    LlmModel.GPT5_NANO: ModelMetadata("openai", 400000, 128000),
    LlmModel.GPT5_CHAT: ModelMetadata("openai", 400000, 16384),
--- a/autogpt_platform/backend/backend/data/block_cost_config.py
+++ b/autogpt_platform/backend/backend/data/block_cost_config.py
@@ -11,6 +11,7 @@ from backend.blocks.ai_shortform_video_block import (
 from backend.blocks.apollo.organization import SearchOrganizationsBlock
 from backend.blocks.apollo.people import SearchPeopleBlock
 from backend.blocks.apollo.person import GetPersonDetailBlock
+from backend.blocks.codex import CodeGenerationBlock, CodexModel
 from backend.blocks.enrichlayer.linkedin import (
    GetLinkedinProfileBlock,
    GetLinkedinProfilePictureBlock,
@@ -63,9 +64,10 @@ MODEL_COST: dict[LlmModel, int] = {
    LlmModel.O1_MINI: 4,
    # GPT-5 models
    LlmModel.GPT5: 2,
+    LlmModel.GPT5_1: 5,
    LlmModel.GPT5_MINI: 1,
    LlmModel.GPT5_NANO: 1,
-    LlmModel.GPT5_CHAT: 2,
+    LlmModel.GPT5_CHAT: 5,
    LlmModel.GPT41: 2,
    LlmModel.GPT41_MINI: 1,
    LlmModel.GPT4O_MINI: 1,
@@ -265,6 +267,20 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
    AIStructuredResponseGeneratorBlock: LLM_COST,
    AITextSummarizerBlock: LLM_COST,
    AIListGeneratorBlock: LLM_COST,
+    CodeGenerationBlock: [
+        BlockCost(
+            cost_type=BlockCostType.RUN,
+            cost_filter={
+                "model": CodexModel.GPT5_1_CODEX,
+                "credentials": {
+                    "id": openai_credentials.id,
+                    "provider": openai_credentials.provider,
+                    "type": openai_credentials.type,
+                },
+            },
+            cost_amount=5,
+        )
+    ],
    CreateTalkingAvatarVideoBlock: [
        BlockCost(
            cost_amount=15,