Compare commits

...

2 Commits

Author SHA1 Message Date
Otto
8769b55c51 fix: use MAX_RETRIES constant and invalidate cache before reload
Addresses CodeRabbit review feedback:
- Use shared MAX_RETRIES constant instead of hardcoded value
- Invalidate session cache before reloading to ensure fresh data
2026-01-31 08:56:12 +00:00
Otto
2e4ec66730 fix(copilot): include tools schema in LLM continuation after long-running tools
When _generate_llm_continuation() is called after long-running tools
(like create_agent) complete, it was making LLM calls without the tools
schema. Some providers (especially Anthropic via OpenRouter) require
the tools schema when conversation history contains tool_use/tool_result
blocks, causing validation errors and unresponsive chats.

Changes:
- Add tools=tools with tool_choice="none" to allow providers to validate
  tool interactions without making new tool calls
- Add retry logic with exponential backoff (matching _stream_chat_chunks)
- Improve error logging for retry exhaustion

Fixes chat becoming unresponsive after create_agent tool execution.
2026-01-31 07:24:37 +00:00

View File

@@ -1883,7 +1883,14 @@ async def _generate_llm_continuation(
This is called by background tasks to continue the conversation This is called by background tasks to continue the conversation
after a tool result is saved. The response is saved to the database after a tool result is saved. The response is saved to the database
so users see it when they refresh or poll. so users see it when they refresh or poll.
Includes retry logic with exponential backoff for transient API errors.
""" """
retry_count = 0
max_retries = MAX_RETRIES
last_error: Exception | None = None
while retry_count <= max_retries:
try: try:
# Load fresh session from DB (bypass cache to get the updated tool result) # Load fresh session from DB (bypass cache to get the updated tool result)
await invalidate_session_cache(session_id) await invalidate_session_cache(session_id)
@@ -1918,15 +1925,19 @@ async def _generate_llm_continuation(
if session_id: if session_id:
extra_body["session_id"] = session_id[:128] extra_body["session_id"] = session_id[:128]
# Make non-streaming LLM call (no tools - just text response)
from typing import cast from typing import cast
from openai.types.chat import ChatCompletionMessageParam from openai.types.chat import ChatCompletionMessageParam
# No tools parameter = text-only response (no tool calls) # Include tools with tool_choice="none" to allow the provider to validate
# tool interactions in the message history without allowing new tool calls.
# Some providers (especially Anthropic via OpenRouter) require the tools
# schema to be present when the conversation contains tool_use/tool_result.
response = await client.chat.completions.create( response = await client.chat.completions.create(
model=config.model, model=config.model,
messages=cast(list[ChatCompletionMessageParam], messages), messages=cast(list[ChatCompletionMessageParam], messages),
tools=tools,
tool_choice="none",
extra_body=extra_body, extra_body=extra_body,
) )
@@ -1934,7 +1945,9 @@ async def _generate_llm_continuation(
assistant_content = response.choices[0].message.content assistant_content = response.choices[0].message.content
# Reload session from DB to avoid race condition with user messages # Reload session from DB to avoid race condition with user messages
# that may have been sent while we were generating the LLM response # that may have been sent while we were generating the LLM response.
# Invalidate cache first to ensure we get fresh data from the database.
await invalidate_session_cache(session_id)
fresh_session = await get_chat_session(session_id, user_id) fresh_session = await get_chat_session(session_id, user_id)
if not fresh_session: if not fresh_session:
logger.error( logger.error(
@@ -1959,8 +1972,38 @@ async def _generate_llm_continuation(
f"Generated LLM continuation for session {session_id}, " f"Generated LLM continuation for session {session_id}, "
f"response length: {len(assistant_content)}" f"response length: {len(assistant_content)}"
) )
return # Success - exit the retry loop
else: else:
logger.warning(f"LLM continuation returned empty response for {session_id}") logger.warning(
f"LLM continuation returned empty response for {session_id}"
)
return # Empty response is not retryable
except Exception as e: except Exception as e:
logger.error(f"Failed to generate LLM continuation: {e}", exc_info=True) last_error = e
if _is_retryable_error(e) and retry_count < max_retries:
retry_count += 1
delay = min(
BASE_DELAY_SECONDS * (2 ** (retry_count - 1)),
MAX_DELAY_SECONDS,
)
logger.warning(
f"Retryable error in LLM continuation for {session_id}: {e!s}. "
f"Retrying in {delay:.1f}s (attempt {retry_count}/{max_retries})"
)
await asyncio.sleep(delay)
continue
else:
# Non-retryable error or max retries exceeded
logger.error(
f"Failed to generate LLM continuation for {session_id}: {e!s}",
exc_info=True,
)
break
# If we get here, all retries failed
if last_error:
logger.error(
f"LLM continuation failed after {retry_count} retries for {session_id}. "
f"Last error: {last_error!s}"
)