feat(platform): update OpenAI calls to use responses.create for reasoning models

Adds conditional support for OpenAI's Responses API for reasoning models (o1, o3, etc.) that are incompatible with chat.completions.create. Changes: - Add openai_responses.py helper module with: - requires_responses_api() for model detection (exact matching) - convert_tools_to_responses_format() for tool format conversion - extract_responses_tool_calls() for tool call extraction - extract_usage() for normalized token usage - extract_responses_content() for content extraction - extract_responses_reasoning() for reasoning extraction - Update llm.py OpenAI provider to conditionally use responses.create for reasoning models while keeping chat.completions.create for others - Add unit tests for helper functions Resolves: #11624 Linear: OPEN-2911
2026-02-13 08:14:58 -05:00 · 2026-02-13 08:15:42 +00:00
3 changed files with 408 additions and 26 deletions
--- a/autogpt_platform/backend/backend/blocks/llm.py
+++ b/autogpt_platform/backend/backend/blocks/llm.py
@@ -32,6 +32,14 @@ from backend.data.model import (
 from backend.integrations.providers import ProviderName
 from backend.util import json
 from backend.util.logging import TruncatedLogger
 from backend.util.openai_responses import (
    convert_tools_to_responses_format,
    extract_responses_content,
    extract_responses_reasoning,
    extract_responses_tool_calls,
    extract_usage,
    requires_responses_api,
 )
 from backend.util.prompt import compress_context, estimate_token_count
 from backend.util.text import TextFormatter
@@ -659,38 +667,72 @@ async def llm_call(
    max_tokens = max(min(available_tokens, model_max_output, user_max), 1)
    if provider == "openai":
        tools_param = tools if tools else openai.NOT_GIVEN
        oai_client = openai.AsyncOpenAI(api_key=credentials.api_key.get_secret_value())
        response_format = None
-        parallel_tool_calls = get_parallel_tool_calls_param(
+        # Check if this model requires the Responses API (reasoning models: o1, o3, etc.)
-            llm_model, parallel_tool_calls
+        if requires_responses_api(llm_model.value):
-        )
+            # Use responses.create for reasoning models
            tools_converted = (
                convert_tools_to_responses_format(tools) if tools else None
            )
-        if force_json_output:
+            response = await oai_client.responses.create(
-            response_format = {"type": "json_object"}
+                model=llm_model.value,
                input=prompt,  # type: ignore
                tools=tools_converted,  # type: ignore
                max_output_tokens=max_tokens,
                store=False,  # Don't persist conversations
            )
-        response = await oai_client.chat.completions.create(
+            tool_calls = extract_responses_tool_calls(response)
-            model=llm_model.value,
+            reasoning = extract_responses_reasoning(response)
-            messages=prompt,  # type: ignore
+            content = extract_responses_content(response)
-            response_format=response_format,  # type: ignore
+            prompt_tokens, completion_tokens = extract_usage(response, True)
            max_completion_tokens=max_tokens,
            tools=tools_param,  # type: ignore
            parallel_tool_calls=parallel_tool_calls,
        )
-        tool_calls = extract_openai_tool_calls(response)
+            return LLMResponse(
-        reasoning = extract_openai_reasoning(response)
+                raw_response=response,
                prompt=prompt,
                response=content,
                tool_calls=tool_calls,
                prompt_tokens=prompt_tokens,
                completion_tokens=completion_tokens,
                reasoning=reasoning,
            )
        else:
            # Use chat.completions.create for standard models
            tools_param = tools if tools else openai.NOT_GIVEN
            response_format = None
-        return LLMResponse(
+            parallel_tool_calls = get_parallel_tool_calls_param(
-            raw_response=response.choices[0].message,
+                llm_model, parallel_tool_calls
-            prompt=prompt,
+            )
-            response=response.choices[0].message.content or "",
+
-            tool_calls=tool_calls,
+            if force_json_output:
-            prompt_tokens=response.usage.prompt_tokens if response.usage else 0,
+                response_format = {"type": "json_object"}
-            completion_tokens=response.usage.completion_tokens if response.usage else 0,
+
-            reasoning=reasoning,
+            response = await oai_client.chat.completions.create(
-        )
+                model=llm_model.value,
                messages=prompt,  # type: ignore
                response_format=response_format,  # type: ignore
                max_completion_tokens=max_tokens,
                tools=tools_param,  # type: ignore
                parallel_tool_calls=parallel_tool_calls,
            )
            tool_calls = extract_openai_tool_calls(response)
            reasoning = extract_openai_reasoning(response)
            return LLMResponse(
                raw_response=response.choices[0].message,
                prompt=prompt,
                response=response.choices[0].message.content or "",
                tool_calls=tool_calls,
                prompt_tokens=response.usage.prompt_tokens if response.usage else 0,
                completion_tokens=(
                    response.usage.completion_tokens if response.usage else 0
                ),
                reasoning=reasoning,
            )
    elif provider == "anthropic":
        an_tools = convert_openai_tool_fmt_to_anthropic(tools)
--- a/autogpt_platform/backend/backend/util/openai_responses.py
+++ b/autogpt_platform/backend/backend/util/openai_responses.py
@@ -0,0 +1,185 @@
 """Helpers for OpenAI Responses API migration.
 This module provides utilities for conditionally using OpenAI's Responses API
 instead of Chat Completions for reasoning models (o1, o3, etc.) that require it.
 """
 from typing import Any
 # Exact model identifiers that require the Responses API.
 # Use exact matching to avoid false positives on future models.
 # NOTE: Update this set when OpenAI releases new reasoning models.
 REASONING_MODELS = frozenset(
    {
        # O1 family
        "o1",
        "o1-mini",
        "o1-preview",
        "o1-2024-12-17",
        # O3 family
        "o3",
        "o3-mini",
        "o3-2025-04-16",
        "o3-mini-2025-01-31",
    }
 )
 def requires_responses_api(model: str) -> bool:
    """Check if model requires the Responses API (exact match).
    Args:
        model: The model identifier string (e.g., "o3-mini", "gpt-4o")
    Returns:
        True if the model requires responses.create, False otherwise
    """
    return model in REASONING_MODELS
 def convert_tools_to_responses_format(tools: list[dict] | None) -> list[dict]:
    """Convert Chat Completions tool format to Responses API format.
    The Responses API uses internally-tagged polymorphism (flatter structure)
    and functions are strict by default.
    Chat Completions format:
        {"type": "function", "function": {"name": "...", "parameters": {...}}}
    Responses API format:
        {"type": "function", "name": "...", "parameters": {...}}
    Args:
        tools: List of tools in Chat Completions format
    Returns:
        List of tools in Responses API format
    """
    if not tools:
        return []
    converted = []
    for tool in tools:
        if tool.get("type") == "function":
            func = tool.get("function", {})
            converted.append(
                {
                    "type": "function",
                    "name": func.get("name"),
                    "description": func.get("description"),
                    "parameters": func.get("parameters"),
                    # Note: strict=True is default in Responses API
                }
            )
        else:
            # Pass through non-function tools as-is
            converted.append(tool)
    return converted
 def extract_responses_tool_calls(response: Any) -> list[dict] | None:
    """Extract tool calls from Responses API response.
    The Responses API returns tool calls as separate items in the output array
    with type="function_call".
    Args:
        response: The Responses API response object
    Returns:
        List of tool calls in a normalized format, or None if no tool calls
    """
    tool_calls = []
    for item in response.output:
        if getattr(item, "type", None) == "function_call":
            tool_calls.append(
                {
                    "id": item.call_id,
                    "type": "function",
                    "function": {
                        "name": item.name,
                        "arguments": item.arguments,
                    },
                }
            )
    return tool_calls if tool_calls else None
 def extract_usage(response: Any, is_responses_api: bool) -> tuple[int, int]:
    """Extract token usage from either API response.
    The Responses API uses different field names for token counts:
    - Chat Completions: prompt_tokens, completion_tokens
    - Responses API: input_tokens, output_tokens
    Args:
        response: The API response object
        is_responses_api: True if response is from Responses API
    Returns:
        Tuple of (prompt_tokens, completion_tokens)
    """
    if not response.usage:
        return 0, 0
    if is_responses_api:
        # Responses API uses different field names
        return (
            getattr(response.usage, "input_tokens", 0),
            getattr(response.usage, "output_tokens", 0),
        )
    else:
        # Chat Completions API
        return (
            getattr(response.usage, "prompt_tokens", 0),
            getattr(response.usage, "completion_tokens", 0),
        )
 def extract_responses_content(response: Any) -> str:
    """Extract text content from Responses API response.
    Args:
        response: The Responses API response object
    Returns:
        The text content from the response, or empty string if none
    """
    # The SDK provides a helper property
    if hasattr(response, "output_text"):
        return response.output_text or ""
    # Fallback: manually extract from output items
    for item in response.output:
        if getattr(item, "type", None) == "message":
            for content in getattr(item, "content", []):
                if getattr(content, "type", None) == "output_text":
                    return getattr(content, "text", "")
    return ""
 def extract_responses_reasoning(response: Any) -> str | None:
    """Extract reasoning content from Responses API response.
    Reasoning models return their reasoning process in the response,
    which can be useful for debugging or display.
    Args:
        response: The Responses API response object
    Returns:
        The reasoning text, or None if not present
    """
    for item in response.output:
        if getattr(item, "type", None) == "reasoning":
            # Reasoning items may have summary or content
            summary = getattr(item, "summary", [])
            if summary:
                # Join summary items if present
                texts = []
                for s in summary:
                    if hasattr(s, "text"):
                        texts.append(s.text)
                if texts:
                    return "\n".join(texts)
    return None
--- a/autogpt_platform/backend/backend/util/openai_responses_test.py
+++ b/autogpt_platform/backend/backend/util/openai_responses_test.py
@@ -0,0 +1,155 @@
 """Tests for OpenAI Responses API helpers."""
 import pytest
 from backend.util.openai_responses import (
    REASONING_MODELS,
    convert_tools_to_responses_format,
    requires_responses_api,
 )
 class TestRequiresResponsesApi:
    """Tests for the requires_responses_api function."""
    def test_o1_models_require_responses_api(self):
        """O1 family models should require the Responses API."""
        assert requires_responses_api("o1") is True
        assert requires_responses_api("o1-mini") is True
        assert requires_responses_api("o1-preview") is True
        assert requires_responses_api("o1-2024-12-17") is True
    def test_o3_models_require_responses_api(self):
        """O3 family models should require the Responses API."""
        assert requires_responses_api("o3") is True
        assert requires_responses_api("o3-mini") is True
        assert requires_responses_api("o3-2025-04-16") is True
        assert requires_responses_api("o3-mini-2025-01-31") is True
    def test_gpt_models_do_not_require_responses_api(self):
        """GPT models should NOT require the Responses API."""
        assert requires_responses_api("gpt-4o") is False
        assert requires_responses_api("gpt-4o-mini") is False
        assert requires_responses_api("gpt-4-turbo") is False
        assert requires_responses_api("gpt-3.5-turbo") is False
        assert requires_responses_api("gpt-5") is False
        assert requires_responses_api("gpt-5-mini") is False
    def test_other_models_do_not_require_responses_api(self):
        """Other provider models should NOT require the Responses API."""
        assert requires_responses_api("claude-3-opus") is False
        assert requires_responses_api("llama-3.3-70b") is False
        assert requires_responses_api("gemini-pro") is False
    def test_empty_string_does_not_require_responses_api(self):
        """Empty string should not require the Responses API."""
        assert requires_responses_api("") is False
    def test_exact_matching_no_false_positives(self):
        """Should not match models that just start with 'o1' or 'o3'."""
        # These are hypothetical models that start with o1/o3 but aren't
        # actually reasoning models
        assert requires_responses_api("o1-turbo-hypothetical") is False
        assert requires_responses_api("o3-fast-hypothetical") is False
        assert requires_responses_api("o100") is False
 class TestConvertToolsToResponsesFormat:
    """Tests for the convert_tools_to_responses_format function."""
    def test_empty_tools_returns_empty_list(self):
        """Empty or None tools should return empty list."""
        assert convert_tools_to_responses_format(None) == []
        assert convert_tools_to_responses_format([]) == []
    def test_converts_function_tool_format(self):
        """Should convert Chat Completions function format to Responses format."""
        chat_completions_tools = [
            {
                "type": "function",
                "function": {
                    "name": "get_weather",
                    "description": "Get the weather in a location",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "location": {"type": "string"},
                        },
                        "required": ["location"],
                    },
                },
            }
        ]
        result = convert_tools_to_responses_format(chat_completions_tools)
        assert len(result) == 1
        assert result[0]["type"] == "function"
        assert result[0]["name"] == "get_weather"
        assert result[0]["description"] == "Get the weather in a location"
        assert result[0]["parameters"] == {
            "type": "object",
            "properties": {
                "location": {"type": "string"},
            },
            "required": ["location"],
        }
        # Should not have nested "function" key
        assert "function" not in result[0]
    def test_handles_multiple_tools(self):
        """Should handle multiple tools."""
        chat_completions_tools = [
            {
                "type": "function",
                "function": {
                    "name": "tool_1",
                    "description": "First tool",
                    "parameters": {"type": "object", "properties": {}},
                },
            },
            {
                "type": "function",
                "function": {
                    "name": "tool_2",
                    "description": "Second tool",
                    "parameters": {"type": "object", "properties": {}},
                },
            },
        ]
        result = convert_tools_to_responses_format(chat_completions_tools)
        assert len(result) == 2
        assert result[0]["name"] == "tool_1"
        assert result[1]["name"] == "tool_2"
    def test_passes_through_non_function_tools(self):
        """Non-function tools should be passed through as-is."""
        tools = [{"type": "web_search", "config": {"enabled": True}}]
        result = convert_tools_to_responses_format(tools)
        assert result == tools
 class TestReasoningModelsSet:
    """Tests for the REASONING_MODELS constant."""
    def test_reasoning_models_is_frozenset(self):
        """REASONING_MODELS should be a frozenset (immutable)."""
        assert isinstance(REASONING_MODELS, frozenset)
    def test_contains_expected_models(self):
        """Should contain all expected reasoning models."""
        expected = {
            "o1",
            "o1-mini",
            "o1-preview",
            "o1-2024-12-17",
            "o3",
            "o3-mini",
            "o3-2025-04-16",
            "o3-mini-2025-01-31",
        }
        assert expected.issubset(REASONING_MODELS)