fix(backend): use OpenRouter client for simulator to support non-OpenAI models (#12656)

## Why

Dry-run block simulation is failing in production with `404 - model
gemini-2.5-flash does not exist`. The simulator's default model
(`google/gemini-2.5-flash`) is a non-OpenAI model that requires
OpenRouter routing, but the shared `get_openai_client()` prefers the
direct OpenAI key, creating a client that can't handle non-OpenAI
models. The old code also stripped the provider prefix, sending
`gemini-2.5-flash` to OpenAI's API.

## What

- Added `prefer_openrouter` keyword parameter to `get_openai_client()` —
when True, prefers the OpenRouter key (returns None if unavailable,
rather than falling back to an incompatible direct OpenAI client)
- Simulator now calls `get_openai_client(prefer_openrouter=True)` so
`google/gemini-2.5-flash` routes correctly through OpenRouter
- Removed the redundant `SIMULATION_MODEL` env var override and the
now-unnecessary provider prefix stripping from `_simulator_model()`

## How

`get_openai_client()` is decorated with `@cached(ttl_seconds=3600)`
which keys by args, so `get_openai_client()` and
`get_openai_client(prefer_openrouter=True)` are cached independently.
When `prefer_openrouter=True` and no OpenRouter key exists, returns
`None` instead of falling back — the simulator already handles `None`
with a clear error message.

### Checklist
- [x] All 24 dry-run tests pass
- [x] Test asserts `get_openai_client` is called with
`prefer_openrouter=True`
- [x] Format, lint, and pyright pass
- [x] No changes to user-facing APIs
- [ ] Deploy to staging and verify simulation works

---------

Co-authored-by: Nicholas Tindle <nicholas.tindle@agpt.co>
This commit is contained in:
Zamil Majdy
2026-04-03 13:19:09 +02:00
committed by GitHub
parent 86abfbd394
commit 92b395d82a
4 changed files with 97 additions and 41 deletions

View File

@@ -90,11 +90,12 @@ async def test_simulate_block_basic():
with patch(
"backend.executor.simulator.get_openai_client", return_value=mock_client
):
) as mock_get_client:
outputs = []
async for name, data in simulate_block(mock_block, {"query": "test"}):
outputs.append((name, data))
mock_get_client.assert_called_once_with(prefer_openrouter=True)
assert ("result", "simulated output") in outputs
# Empty error pin should NOT be yielded — the simulator omits empty values
assert ("error", "") not in outputs

View File

@@ -31,7 +31,6 @@ Inspired by https://github.com/Significant-Gravitas/agent-simulator
import inspect
import json
import logging
import os
from collections.abc import AsyncGenerator
from typing import Any
@@ -44,39 +43,18 @@ logger = logging.getLogger(__name__)
# Default simulator model — Gemini 2.5 Flash via OpenRouter (fast, cheap, good at
# JSON generation). Configurable via SIMULATION_MODEL env var or
# ChatConfig.simulation_model.
# JSON generation). Configurable via ChatConfig.simulation_model
# (CHAT_SIMULATION_MODEL env var).
_DEFAULT_SIMULATOR_MODEL = "google/gemini-2.5-flash"
def _simulator_model() -> str:
# 1. Environment variable override (highest priority).
env_model = os.environ.get("SIMULATION_MODEL")
if env_model:
model = env_model
else:
# 2. ChatConfig.simulation_model (falls back to default).
try:
from backend.copilot.config import ChatConfig # noqa: PLC0415
model = ChatConfig().simulation_model or _DEFAULT_SIMULATOR_MODEL
except Exception:
model = _DEFAULT_SIMULATOR_MODEL
# get_openai_client() may return a direct OpenAI client (not OpenRouter).
# Direct OpenAI expects bare model names ("gpt-4o-mini"), not the
# OpenRouter-prefixed form ("openai/gpt-4o-mini"). Strip the prefix when
# the internal OpenAI key is configured (i.e. not going through OpenRouter).
try:
from backend.util.settings import Settings # noqa: PLC0415
from backend.copilot.config import ChatConfig # noqa: PLC0415
secrets = Settings().secrets
if secrets.openai_internal_api_key and "/" in model:
model = model.split("/", 1)[1]
return ChatConfig().simulation_model or _DEFAULT_SIMULATOR_MODEL
except Exception:
pass
return model
return _DEFAULT_SIMULATOR_MODEL
_TEMPERATURE = 0.2
@@ -136,7 +114,7 @@ async def _call_llm_for_simulation(
RuntimeError: If no LLM client is available.
ValueError: If all retry attempts are exhausted.
"""
client = get_openai_client()
client = get_openai_client(prefer_openrouter=True)
if client is None:
raise RuntimeError(
"[SIMULATOR ERROR — NOT A BLOCK FAILURE] No LLM client available "

View File

@@ -163,23 +163,31 @@ async def get_async_supabase() -> "AClient":
@cached(ttl_seconds=3600)
def get_openai_client() -> "AsyncOpenAI | None":
def get_openai_client(*, prefer_openrouter: bool = False) -> "AsyncOpenAI | None":
"""
Get a process-cached async OpenAI client for embeddings.
Get a process-cached async OpenAI client.
Prefers openai_internal_api_key (direct OpenAI). Falls back to
open_router_api_key via OpenRouter's OpenAI-compatible endpoint.
Returns None if neither key is configured.
By default prefers openai_internal_api_key (direct OpenAI) and falls back
to open_router_api_key via OpenRouter.
When ``prefer_openrouter=True``, returns an OpenRouter client or None —
does **not** fall back to direct OpenAI (which can't route non-OpenAI
models like ``google/gemini-2.5-flash``).
"""
from openai import AsyncOpenAI
if settings.secrets.openai_internal_api_key:
return AsyncOpenAI(api_key=settings.secrets.openai_internal_api_key)
if settings.secrets.open_router_api_key:
return AsyncOpenAI(
api_key=settings.secrets.open_router_api_key,
base_url=OPENROUTER_BASE_URL,
)
openai_key = settings.secrets.openai_internal_api_key
openrouter_key = settings.secrets.open_router_api_key
if prefer_openrouter:
if openrouter_key:
return AsyncOpenAI(api_key=openrouter_key, base_url=OPENROUTER_BASE_URL)
return None
else:
if openai_key:
return AsyncOpenAI(api_key=openai_key)
if openrouter_key:
return AsyncOpenAI(api_key=openrouter_key, base_url=OPENROUTER_BASE_URL)
return None

View File

@@ -0,0 +1,69 @@
"""Tests for get_openai_client prefer_openrouter parameter."""
from unittest.mock import MagicMock, patch
import pytest
from backend.util.clients import get_openai_client
@pytest.fixture(autouse=True)
def _clear_client_cache():
"""Clear the @cached singleton between tests."""
get_openai_client.cache_clear()
yield
get_openai_client.cache_clear()
def _mock_secrets(*, openai_key: str = "", openrouter_key: str = "") -> MagicMock:
secrets = MagicMock()
secrets.openai_internal_api_key = openai_key
secrets.open_router_api_key = openrouter_key
return secrets
class TestGetOpenaiClientDefault:
def test_prefers_openai_key(self):
secrets = _mock_secrets(openai_key="sk-openai", openrouter_key="sk-or")
with patch("backend.util.clients.settings") as mock_settings:
mock_settings.secrets = secrets
client = get_openai_client()
assert client is not None
assert client.api_key == "sk-openai"
assert "openrouter" not in str(client.base_url or "")
def test_falls_back_to_openrouter(self):
secrets = _mock_secrets(openrouter_key="sk-or")
with patch("backend.util.clients.settings") as mock_settings:
mock_settings.secrets = secrets
client = get_openai_client()
assert client is not None
assert client.api_key == "sk-or"
def test_returns_none_when_no_keys(self):
secrets = _mock_secrets()
with patch("backend.util.clients.settings") as mock_settings:
mock_settings.secrets = secrets
assert get_openai_client() is None
class TestGetOpenaiClientPreferOpenrouter:
def test_returns_openrouter_client(self):
secrets = _mock_secrets(openai_key="sk-openai", openrouter_key="sk-or")
with patch("backend.util.clients.settings") as mock_settings:
mock_settings.secrets = secrets
client = get_openai_client(prefer_openrouter=True)
assert client is not None
assert client.api_key == "sk-or"
def test_returns_none_without_openrouter_key(self):
secrets = _mock_secrets(openai_key="sk-openai")
with patch("backend.util.clients.settings") as mock_settings:
mock_settings.secrets = secrets
assert get_openai_client(prefer_openrouter=True) is None
def test_returns_none_when_no_keys(self):
secrets = _mock_secrets()
with patch("backend.util.clients.settings") as mock_settings:
mock_settings.secrets = secrets
assert get_openai_client(prefer_openrouter=True) is None