fix(backend/copilot): use shared get_openai_client() for transcript compaction

Replace per-call AsyncOpenAI instantiation with the process-cached
get_openai_client() from backend.util.clients, consistent with
optimize_blocks.py usage.
This commit is contained in:
Zamil Majdy
2026-03-14 11:08:07 +07:00
parent 5dc8d6c848
commit 41872e003b

View File

@@ -21,10 +21,8 @@ from dataclasses import dataclass
from pathlib import Path
from uuid import uuid4
import openai
from backend.copilot.config import ChatConfig
from backend.util import json
from backend.util.clients import get_openai_client
from backend.util.prompt import CompressResult, compress_context
logger = logging.getLogger(__name__)
@@ -705,20 +703,21 @@ def _messages_to_transcript(messages: list[dict]) -> str:
async def _run_compression(
messages: list[dict],
model: str,
cfg: ChatConfig,
log_prefix: str,
) -> CompressResult:
"""Run LLM-based compression with truncation fallback.
If the LLM call fails (API error, timeout, network), falls back to
Uses the shared OpenAI client from ``get_openai_client()``.
If no client is configured or the LLM call fails, falls back to
truncation-based compression which drops older messages without
summarization.
"""
client = get_openai_client()
if client is None:
logger.warning("%s No OpenAI client configured, using truncation", log_prefix)
return await compress_context(messages=messages, model=model, client=None)
try:
async with openai.AsyncOpenAI(
api_key=cfg.api_key, base_url=cfg.base_url, timeout=30.0
) as client:
return await compress_context(messages=messages, model=model, client=client)
return await compress_context(messages=messages, model=model, client=client)
except Exception as e:
logger.warning("%s LLM compaction failed, using truncation: %s", log_prefix, e)
return await compress_context(messages=messages, model=model, client=None)
@@ -735,13 +734,15 @@ async def compact_transcript(
Returns the compacted JSONL string, or ``None`` on failure.
"""
from backend.copilot.config import ChatConfig
cfg = ChatConfig()
messages = _transcript_to_messages(content)
if len(messages) < 2:
logger.warning("%s Too few messages to compact (%d)", log_prefix, len(messages))
return None
try:
result = await _run_compression(messages, cfg.model, cfg, log_prefix)
result = await _run_compression(messages, cfg.model, log_prefix)
if not result.was_compacted:
# Compressor says it's within budget, but the SDK rejected it.
# Return None so the caller falls through to DB fallback.