Files
AutoGPT/autogpt_platform/backend/backend/blocks/codex.py
Bently 64a775dfa7 feat(backend/blocks): Add GPT-5.1 and GPT-5.1-codex (#11406)
This pr adds the latest gpt-5.1 and gpt-5.1-codex llm's from openai, as
well as update the price of the gpt-5-chat model

https://platform.openai.com/docs/models/gpt-5.1
https://platform.openai.com/docs/models/gpt-5.1-codex

I have also had to add a new codex block as it uses a different openai
API and has other options the main llm's dont use

<img width="231" height="755" alt="image"
src="https://github.com/user-attachments/assets/a4056633-7b0f-446f-ae86-d7755c5b88ec"
/>


#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  <!-- Put your test plan here: -->
  - [x] Test the latest gpt-5.1 llm
  - [x] Test the latest gpt-5.1-codex block

---------

Co-authored-by: Zamil Majdy <zamil.majdy@agpt.co>
Co-authored-by: Claude <noreply@anthropic.com>
2025-11-25 09:33:11 +00:00

225 lines
7.2 KiB
Python

from dataclasses import dataclass
from enum import Enum
from typing import Any, Literal
from openai import AsyncOpenAI
from openai.types.responses import Response as OpenAIResponse
from pydantic import SecretStr
from backend.data.block import (
Block,
BlockCategory,
BlockOutput,
BlockSchemaInput,
BlockSchemaOutput,
)
from backend.data.model import (
APIKeyCredentials,
CredentialsField,
CredentialsMetaInput,
NodeExecutionStats,
SchemaField,
)
from backend.integrations.providers import ProviderName
@dataclass
class CodexCallResult:
"""Structured response returned by Codex invocations."""
response: str
reasoning: str
response_id: str
class CodexModel(str, Enum):
"""Codex-capable OpenAI models."""
GPT5_1_CODEX = "gpt-5.1-codex"
class CodexReasoningEffort(str, Enum):
"""Configuration for the Responses API reasoning effort."""
NONE = "none"
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
CodexCredentials = CredentialsMetaInput[
Literal[ProviderName.OPENAI], Literal["api_key"]
]
TEST_CREDENTIALS = APIKeyCredentials(
id="e2fcb203-3f2d-4ad4-a344-8df3bc7db36b",
provider="openai",
api_key=SecretStr("mock-openai-api-key"),
title="Mock OpenAI API key",
expires_at=None,
)
TEST_CREDENTIALS_INPUT = {
"provider": TEST_CREDENTIALS.provider,
"id": TEST_CREDENTIALS.id,
"type": TEST_CREDENTIALS.type,
"title": TEST_CREDENTIALS.title,
}
def CodexCredentialsField() -> CodexCredentials:
return CredentialsField(
description="OpenAI API key with access to Codex models (Responses API).",
)
class CodeGenerationBlock(Block):
"""Block that talks to Codex models via the OpenAI Responses API."""
class Input(BlockSchemaInput):
prompt: str = SchemaField(
description="Primary coding request passed to the Codex model.",
placeholder="Generate a Python function that reverses a list.",
)
system_prompt: str = SchemaField(
title="System Prompt",
default=(
"You are Codex, an elite software engineer. "
"Favor concise, working code and highlight important caveats."
),
description="Optional instructions injected via the Responses API instructions field.",
advanced=True,
)
model: CodexModel = SchemaField(
title="Codex Model",
default=CodexModel.GPT5_1_CODEX,
description="Codex-optimized model served via the Responses API.",
advanced=False,
)
reasoning_effort: CodexReasoningEffort = SchemaField(
title="Reasoning Effort",
default=CodexReasoningEffort.MEDIUM,
description="Controls the Responses API reasoning budget. Select 'none' to skip reasoning configs.",
advanced=True,
)
max_output_tokens: int | None = SchemaField(
title="Max Output Tokens",
default=2048,
description="Upper bound for generated tokens (hard limit 128,000). Leave blank to let OpenAI decide.",
advanced=True,
)
credentials: CodexCredentials = CodexCredentialsField()
class Output(BlockSchemaOutput):
response: str = SchemaField(
description="Code-focused response returned by the Codex model."
)
reasoning: str = SchemaField(
description="Reasoning summary returned by the model, if available.",
default="",
)
response_id: str = SchemaField(
description="ID of the Responses API call for auditing/debugging.",
default="",
)
def __init__(self):
super().__init__(
id="86a2a099-30df-47b4-b7e4-34ae5f83e0d5",
description="Generate or refactor code using OpenAI's Codex (Responses API).",
categories={BlockCategory.AI, BlockCategory.DEVELOPER_TOOLS},
input_schema=CodeGenerationBlock.Input,
output_schema=CodeGenerationBlock.Output,
test_input=[
{
"prompt": "Write a TypeScript function that deduplicates an array.",
"credentials": TEST_CREDENTIALS_INPUT,
}
],
test_output=[
("response", str),
("reasoning", str),
("response_id", str),
],
test_mock={
"call_codex": lambda *_args, **_kwargs: CodexCallResult(
response="function dedupe<T>(items: T[]): T[] { return [...new Set(items)]; }",
reasoning="Used Set to remove duplicates in O(n).",
response_id="resp_test",
)
},
test_credentials=TEST_CREDENTIALS,
)
self.execution_stats = NodeExecutionStats()
async def call_codex(
self,
*,
credentials: APIKeyCredentials,
model: CodexModel,
prompt: str,
system_prompt: str,
max_output_tokens: int | None,
reasoning_effort: CodexReasoningEffort,
) -> CodexCallResult:
"""Invoke the OpenAI Responses API."""
client = AsyncOpenAI(api_key=credentials.api_key.get_secret_value())
request_payload: dict[str, Any] = {
"model": model.value,
"input": prompt,
}
if system_prompt:
request_payload["instructions"] = system_prompt
if max_output_tokens is not None:
request_payload["max_output_tokens"] = max_output_tokens
if reasoning_effort != CodexReasoningEffort.NONE:
request_payload["reasoning"] = {"effort": reasoning_effort.value}
response = await client.responses.create(**request_payload)
if not isinstance(response, OpenAIResponse):
raise TypeError(f"Expected OpenAIResponse, got {type(response).__name__}")
# Extract data directly from typed response
text_output = response.output_text or ""
reasoning_summary = (
str(response.reasoning.summary)
if response.reasoning and response.reasoning.summary
else ""
)
response_id = response.id or ""
# Update usage stats
self.execution_stats.input_token_count = (
response.usage.input_tokens if response.usage else 0
)
self.execution_stats.output_token_count = (
response.usage.output_tokens if response.usage else 0
)
self.execution_stats.llm_call_count += 1
return CodexCallResult(
response=text_output,
reasoning=reasoning_summary,
response_id=response_id,
)
async def run(
self,
input_data: Input,
*,
credentials: APIKeyCredentials,
**_kwargs,
) -> BlockOutput:
result = await self.call_codex(
credentials=credentials,
model=input_data.model,
prompt=input_data.prompt,
system_prompt=input_data.system_prompt,
max_output_tokens=input_data.max_output_tokens,
reasoning_effort=input_data.reasoning_effort,
)
yield "response", result.response
yield "reasoning", result.reasoning
yield "response_id", result.response_id