Compare commits

...

2 Commits

Author SHA1 Message Date
Otto
8769b55c51 fix: use MAX_RETRIES constant and invalidate cache before reload
Addresses CodeRabbit review feedback:
- Use shared MAX_RETRIES constant instead of hardcoded value
- Invalidate session cache before reloading to ensure fresh data
2026-01-31 08:56:12 +00:00
Otto
2e4ec66730 fix(copilot): include tools schema in LLM continuation after long-running tools
When _generate_llm_continuation() is called after long-running tools
(like create_agent) complete, it was making LLM calls without the tools
schema. Some providers (especially Anthropic via OpenRouter) require
the tools schema when conversation history contains tool_use/tool_result
blocks, causing validation errors and unresponsive chats.

Changes:
- Add tools=tools with tool_choice="none" to allow providers to validate
  tool interactions without making new tool calls
- Add retry logic with exponential backoff (matching _stream_chat_chunks)
- Improve error logging for retry exhaustion

Fixes chat becoming unresponsive after create_agent tool execution.
2026-01-31 07:24:37 +00:00

View File

@@ -1883,84 +1883,127 @@ async def _generate_llm_continuation(
This is called by background tasks to continue the conversation This is called by background tasks to continue the conversation
after a tool result is saved. The response is saved to the database after a tool result is saved. The response is saved to the database
so users see it when they refresh or poll. so users see it when they refresh or poll.
Includes retry logic with exponential backoff for transient API errors.
""" """
try: retry_count = 0
# Load fresh session from DB (bypass cache to get the updated tool result) max_retries = MAX_RETRIES
await invalidate_session_cache(session_id) last_error: Exception | None = None
session = await get_chat_session(session_id, user_id)
if not session:
logger.error(f"Session {session_id} not found for LLM continuation")
return
# Build system prompt while retry_count <= max_retries:
system_prompt, _ = await _build_system_prompt(user_id) try:
# Load fresh session from DB (bypass cache to get the updated tool result)
# Build messages in OpenAI format await invalidate_session_cache(session_id)
messages = session.to_openai_messages() session = await get_chat_session(session_id, user_id)
if system_prompt: if not session:
from openai.types.chat import ChatCompletionSystemMessageParam logger.error(f"Session {session_id} not found for LLM continuation")
system_message = ChatCompletionSystemMessageParam(
role="system",
content=system_prompt,
)
messages = [system_message] + messages
# Build extra_body for tracing
extra_body: dict[str, Any] = {
"posthogProperties": {
"environment": settings.config.app_env.value,
},
}
if user_id:
extra_body["user"] = user_id[:128]
extra_body["posthogDistinctId"] = user_id
if session_id:
extra_body["session_id"] = session_id[:128]
# Make non-streaming LLM call (no tools - just text response)
from typing import cast
from openai.types.chat import ChatCompletionMessageParam
# No tools parameter = text-only response (no tool calls)
response = await client.chat.completions.create(
model=config.model,
messages=cast(list[ChatCompletionMessageParam], messages),
extra_body=extra_body,
)
if response.choices and response.choices[0].message.content:
assistant_content = response.choices[0].message.content
# Reload session from DB to avoid race condition with user messages
# that may have been sent while we were generating the LLM response
fresh_session = await get_chat_session(session_id, user_id)
if not fresh_session:
logger.error(
f"Session {session_id} disappeared during LLM continuation"
)
return return
# Save assistant message to database # Build system prompt
assistant_message = ChatMessage( system_prompt, _ = await _build_system_prompt(user_id)
role="assistant",
content=assistant_content, # Build messages in OpenAI format
messages = session.to_openai_messages()
if system_prompt:
from openai.types.chat import ChatCompletionSystemMessageParam
system_message = ChatCompletionSystemMessageParam(
role="system",
content=system_prompt,
)
messages = [system_message] + messages
# Build extra_body for tracing
extra_body: dict[str, Any] = {
"posthogProperties": {
"environment": settings.config.app_env.value,
},
}
if user_id:
extra_body["user"] = user_id[:128]
extra_body["posthogDistinctId"] = user_id
if session_id:
extra_body["session_id"] = session_id[:128]
from typing import cast
from openai.types.chat import ChatCompletionMessageParam
# Include tools with tool_choice="none" to allow the provider to validate
# tool interactions in the message history without allowing new tool calls.
# Some providers (especially Anthropic via OpenRouter) require the tools
# schema to be present when the conversation contains tool_use/tool_result.
response = await client.chat.completions.create(
model=config.model,
messages=cast(list[ChatCompletionMessageParam], messages),
tools=tools,
tool_choice="none",
extra_body=extra_body,
) )
fresh_session.messages.append(assistant_message)
# Save to database (not cache) to persist the response if response.choices and response.choices[0].message.content:
await upsert_chat_session(fresh_session) assistant_content = response.choices[0].message.content
# Invalidate cache so next poll/refresh gets fresh data # Reload session from DB to avoid race condition with user messages
await invalidate_session_cache(session_id) # that may have been sent while we were generating the LLM response.
# Invalidate cache first to ensure we get fresh data from the database.
await invalidate_session_cache(session_id)
fresh_session = await get_chat_session(session_id, user_id)
if not fresh_session:
logger.error(
f"Session {session_id} disappeared during LLM continuation"
)
return
logger.info( # Save assistant message to database
f"Generated LLM continuation for session {session_id}, " assistant_message = ChatMessage(
f"response length: {len(assistant_content)}" role="assistant",
) content=assistant_content,
else: )
logger.warning(f"LLM continuation returned empty response for {session_id}") fresh_session.messages.append(assistant_message)
except Exception as e: # Save to database (not cache) to persist the response
logger.error(f"Failed to generate LLM continuation: {e}", exc_info=True) await upsert_chat_session(fresh_session)
# Invalidate cache so next poll/refresh gets fresh data
await invalidate_session_cache(session_id)
logger.info(
f"Generated LLM continuation for session {session_id}, "
f"response length: {len(assistant_content)}"
)
return # Success - exit the retry loop
else:
logger.warning(
f"LLM continuation returned empty response for {session_id}"
)
return # Empty response is not retryable
except Exception as e:
last_error = e
if _is_retryable_error(e) and retry_count < max_retries:
retry_count += 1
delay = min(
BASE_DELAY_SECONDS * (2 ** (retry_count - 1)),
MAX_DELAY_SECONDS,
)
logger.warning(
f"Retryable error in LLM continuation for {session_id}: {e!s}. "
f"Retrying in {delay:.1f}s (attempt {retry_count}/{max_retries})"
)
await asyncio.sleep(delay)
continue
else:
# Non-retryable error or max retries exceeded
logger.error(
f"Failed to generate LLM continuation for {session_id}: {e!s}",
exc_info=True,
)
break
# If we get here, all retries failed
if last_error:
logger.error(
f"LLM continuation failed after {retry_count} retries for {session_id}. "
f"Last error: {last_error!s}"
)