Compare commits

...

2 Commits

Author SHA1 Message Date
Krzysztof Czerwinski
f2200c306a Migrate to responses 2026-02-19 19:13:02 +09:00
Otto
889b4e4152 feat(platform): update OpenAI calls to use responses.create for reasoning models
Adds conditional support for OpenAI's Responses API for reasoning models
(o1, o3, etc.) that are incompatible with chat.completions.create.

Changes:
- Add openai_responses.py helper module with:
  - requires_responses_api() for model detection (exact matching)
  - convert_tools_to_responses_format() for tool format conversion
  - extract_responses_tool_calls() for tool call extraction
  - extract_usage() for normalized token usage
  - extract_responses_content() for content extraction
  - extract_responses_reasoning() for reasoning extraction
- Update llm.py OpenAI provider to conditionally use responses.create
  for reasoning models while keeping chat.completions.create for others
- Add unit tests for helper functions

Resolves: #11624
Linear: OPEN-2911
2026-02-13 08:15:42 +00:00
3 changed files with 375 additions and 26 deletions

View File

@@ -32,6 +32,14 @@ from backend.data.model import (
from backend.integrations.providers import ProviderName
from backend.util import json
from backend.util.logging import TruncatedLogger
from backend.util.openai_responses import (
convert_tools_to_responses_format,
extract_responses_content,
extract_responses_reasoning,
extract_responses_tool_calls,
extract_responses_usage,
requires_chat_completions,
)
from backend.util.prompt import compress_context, estimate_token_count
from backend.util.text import TextFormatter
@@ -659,38 +667,79 @@ async def llm_call(
max_tokens = max(min(available_tokens, model_max_output, user_max), 1)
if provider == "openai":
tools_param = tools if tools else openai.NOT_GIVEN
oai_client = openai.AsyncOpenAI(api_key=credentials.api_key.get_secret_value())
response_format = None
parallel_tool_calls = get_parallel_tool_calls_param(
llm_model, parallel_tool_calls
)
if requires_chat_completions(llm_model.value):
# Legacy fallback: gpt-3.5-turbo only (not supported by Responses API)
tools_param = tools if tools else openai.NOT_GIVEN
response_format = None
if force_json_output:
response_format = {"type": "json_object"}
parallel_tool_calls = get_parallel_tool_calls_param(
llm_model, parallel_tool_calls
)
response = await oai_client.chat.completions.create(
model=llm_model.value,
messages=prompt, # type: ignore
response_format=response_format, # type: ignore
max_completion_tokens=max_tokens,
tools=tools_param, # type: ignore
parallel_tool_calls=parallel_tool_calls,
)
if force_json_output:
response_format = {"type": "json_object"}
tool_calls = extract_openai_tool_calls(response)
reasoning = extract_openai_reasoning(response)
response = await oai_client.chat.completions.create(
model=llm_model.value,
messages=prompt, # type: ignore
response_format=response_format, # type: ignore
max_completion_tokens=max_tokens,
tools=tools_param, # type: ignore
parallel_tool_calls=parallel_tool_calls,
)
return LLMResponse(
raw_response=response.choices[0].message,
prompt=prompt,
response=response.choices[0].message.content or "",
tool_calls=tool_calls,
prompt_tokens=response.usage.prompt_tokens if response.usage else 0,
completion_tokens=response.usage.completion_tokens if response.usage else 0,
reasoning=reasoning,
)
tool_calls = extract_openai_tool_calls(response)
reasoning = extract_openai_reasoning(response)
return LLMResponse(
raw_response=response.choices[0].message,
prompt=prompt,
response=response.choices[0].message.content or "",
tool_calls=tool_calls,
prompt_tokens=response.usage.prompt_tokens if response.usage else 0,
completion_tokens=(
response.usage.completion_tokens if response.usage else 0
),
reasoning=reasoning,
)
else:
# Default: All modern OpenAI models use the Responses API
tools_param = (
convert_tools_to_responses_format(tools) if tools else openai.omit
)
text_config = openai.omit
if force_json_output:
text_config = {"format": {"type": "json_object"}} # type: ignore
response = await oai_client.responses.create(
model=llm_model.value,
input=prompt, # type: ignore
tools=tools_param, # type: ignore
max_output_tokens=max_tokens,
parallel_tool_calls=get_parallel_tool_calls_param(
llm_model, parallel_tool_calls
),
text=text_config, # type: ignore
store=False,
)
tool_calls = extract_responses_tool_calls(response)
reasoning = extract_responses_reasoning(response)
content = extract_responses_content(response)
prompt_tokens, completion_tokens = extract_responses_usage(response)
return LLMResponse(
raw_response=response,
prompt=prompt,
response=content,
tool_calls=tool_calls,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
reasoning=reasoning,
)
elif provider == "anthropic":
an_tools = convert_openai_tool_fmt_to_anthropic(tools)

View File

@@ -0,0 +1,166 @@
"""Helpers for OpenAI Responses API.
This module provides utilities for using OpenAI's Responses API, which is the
default for all modern OpenAI models. Legacy models (gpt-3.5-turbo) that do not
support the Responses API fall back to Chat Completions.
"""
from typing import Any
# Legacy models that do NOT support the Responses API.
# These must use chat.completions.create instead of responses.create.
CHAT_COMPLETIONS_ONLY_MODELS = frozenset(
{
"gpt-3.5-turbo",
"gpt-3.5-turbo-0125",
}
)
def requires_chat_completions(model: str) -> bool:
"""Check if model requires the legacy Chat Completions API (exact match).
Args:
model: The model identifier string (e.g., "gpt-3.5-turbo", "gpt-4o")
Returns:
True if the model requires chat.completions.create, False otherwise
"""
return model in CHAT_COMPLETIONS_ONLY_MODELS
def convert_tools_to_responses_format(tools: list[dict] | None) -> list[dict]:
"""Convert Chat Completions tool format to Responses API format.
The Responses API uses internally-tagged polymorphism (flatter structure)
and functions are strict by default.
Chat Completions format:
{"type": "function", "function": {"name": "...", "parameters": {...}}}
Responses API format:
{"type": "function", "name": "...", "parameters": {...}}
Args:
tools: List of tools in Chat Completions format
Returns:
List of tools in Responses API format
"""
if not tools:
return []
converted = []
for tool in tools:
if tool.get("type") == "function":
func = tool.get("function", {})
converted.append(
{
"type": "function",
"name": func.get("name"),
"description": func.get("description"),
"parameters": func.get("parameters"),
# Note: strict=True is default in Responses API
}
)
else:
# Pass through non-function tools as-is
converted.append(tool)
return converted
def extract_responses_tool_calls(response: Any) -> list[dict] | None:
"""Extract tool calls from Responses API response.
The Responses API returns tool calls as separate items in the output array
with type="function_call".
Args:
response: The Responses API response object
Returns:
List of tool calls in a normalized format, or None if no tool calls
"""
tool_calls = []
for item in response.output:
if getattr(item, "type", None) == "function_call":
tool_calls.append(
{
"id": item.call_id,
"type": "function",
"function": {
"name": item.name,
"arguments": item.arguments,
},
}
)
return tool_calls if tool_calls else None
def extract_responses_usage(response: Any) -> tuple[int, int]:
"""Extract token usage from Responses API response.
The Responses API uses input_tokens/output_tokens (not prompt_tokens/completion_tokens).
Args:
response: The Responses API response object
Returns:
Tuple of (input_tokens, output_tokens)
"""
if not response.usage:
return 0, 0
return (
getattr(response.usage, "input_tokens", 0),
getattr(response.usage, "output_tokens", 0),
)
def extract_responses_content(response: Any) -> str:
"""Extract text content from Responses API response.
Args:
response: The Responses API response object
Returns:
The text content from the response, or empty string if none
"""
# The SDK provides a helper property
if hasattr(response, "output_text"):
return response.output_text or ""
# Fallback: manually extract from output items
for item in response.output:
if getattr(item, "type", None) == "message":
for content in getattr(item, "content", []):
if getattr(content, "type", None) == "output_text":
return getattr(content, "text", "")
return ""
def extract_responses_reasoning(response: Any) -> str | None:
"""Extract reasoning content from Responses API response.
Reasoning models return their reasoning process in the response,
which can be useful for debugging or display.
Args:
response: The Responses API response object
Returns:
The reasoning text, or None if not present
"""
for item in response.output:
if getattr(item, "type", None) == "reasoning":
# Reasoning items may have summary or content
summary = getattr(item, "summary", [])
if summary:
# Join summary items if present
texts = []
for s in summary:
if hasattr(s, "text"):
texts.append(s.text)
if texts:
return "\n".join(texts)
return None

View File

@@ -0,0 +1,134 @@
"""Tests for OpenAI Responses API helpers."""
from backend.util.openai_responses import (
CHAT_COMPLETIONS_ONLY_MODELS,
convert_tools_to_responses_format,
requires_chat_completions,
)
class TestRequiresChatCompletions:
"""Tests for the requires_chat_completions function."""
def test_gpt35_requires_chat_completions(self):
"""gpt-3.5-turbo models should require Chat Completions API."""
assert requires_chat_completions("gpt-3.5-turbo") is True
assert requires_chat_completions("gpt-3.5-turbo-0125") is True
def test_modern_gpt_models_do_not_require_chat_completions(self):
"""Modern GPT models should NOT require Chat Completions (use Responses API)."""
assert requires_chat_completions("gpt-4o") is False
assert requires_chat_completions("gpt-4o-mini") is False
assert requires_chat_completions("gpt-4-turbo") is False
assert requires_chat_completions("gpt-4.1-2025-04-14") is False
assert requires_chat_completions("gpt-5-2025-08-07") is False
assert requires_chat_completions("gpt-5-mini-2025-08-07") is False
def test_reasoning_models_do_not_require_chat_completions(self):
"""Reasoning models should NOT require Chat Completions (use Responses API)."""
assert requires_chat_completions("o1") is False
assert requires_chat_completions("o1-mini") is False
assert requires_chat_completions("o3") is False
assert requires_chat_completions("o3-mini") is False
def test_other_models_do_not_require_chat_completions(self):
"""Other provider models should NOT require Chat Completions."""
assert requires_chat_completions("claude-3-opus") is False
assert requires_chat_completions("llama-3.3-70b") is False
assert requires_chat_completions("gemini-pro") is False
def test_empty_string_does_not_require_chat_completions(self):
"""Empty string should not require Chat Completions."""
assert requires_chat_completions("") is False
class TestConvertToolsToResponsesFormat:
"""Tests for the convert_tools_to_responses_format function."""
def test_empty_tools_returns_empty_list(self):
"""Empty or None tools should return empty list."""
assert convert_tools_to_responses_format(None) == []
assert convert_tools_to_responses_format([]) == []
def test_converts_function_tool_format(self):
"""Should convert Chat Completions function format to Responses format."""
chat_completions_tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the weather in a location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string"},
},
"required": ["location"],
},
},
}
]
result = convert_tools_to_responses_format(chat_completions_tools)
assert len(result) == 1
assert result[0]["type"] == "function"
assert result[0]["name"] == "get_weather"
assert result[0]["description"] == "Get the weather in a location"
assert result[0]["parameters"] == {
"type": "object",
"properties": {
"location": {"type": "string"},
},
"required": ["location"],
}
# Should not have nested "function" key
assert "function" not in result[0]
def test_handles_multiple_tools(self):
"""Should handle multiple tools."""
chat_completions_tools = [
{
"type": "function",
"function": {
"name": "tool_1",
"description": "First tool",
"parameters": {"type": "object", "properties": {}},
},
},
{
"type": "function",
"function": {
"name": "tool_2",
"description": "Second tool",
"parameters": {"type": "object", "properties": {}},
},
},
]
result = convert_tools_to_responses_format(chat_completions_tools)
assert len(result) == 2
assert result[0]["name"] == "tool_1"
assert result[1]["name"] == "tool_2"
def test_passes_through_non_function_tools(self):
"""Non-function tools should be passed through as-is."""
tools = [{"type": "web_search", "config": {"enabled": True}}]
result = convert_tools_to_responses_format(tools)
assert result == tools
class TestChatCompletionsOnlyModels:
"""Tests for the CHAT_COMPLETIONS_ONLY_MODELS constant."""
def test_is_frozenset(self):
"""CHAT_COMPLETIONS_ONLY_MODELS should be a frozenset (immutable)."""
assert isinstance(CHAT_COMPLETIONS_ONLY_MODELS, frozenset)
def test_contains_expected_models(self):
"""Should contain the legacy gpt-3.5-turbo models."""
expected = {"gpt-3.5-turbo", "gpt-3.5-turbo-0125"}
assert CHAT_COMPLETIONS_ONLY_MODELS == expected