fix: use MAX_RETRIES constant and invalidate cache before reload

Addresses CodeRabbit review feedback: - Use shared MAX_RETRIES constant instead of hardcoded value - Invalidate session cache before reloading to ensure fresh data
fix(copilot): include tools schema in LLM continuation after long-running tools
2026-02-11 07:15:08 -05:00 · 2026-01-31 08:56:12 +00:00 · 2026-01-31 07:24:37 +00:00
1 changed files with 115 additions and 72 deletions
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -1883,7 +1883,14 @@ async def _generate_llm_continuation(
    This is called by background tasks to continue the conversation
    after a tool result is saved. The response is saved to the database
    so users see it when they refresh or poll.
    Includes retry logic with exponential backoff for transient API errors.
    """
    retry_count = 0
    max_retries = MAX_RETRIES
    last_error: Exception | None = None
    while retry_count <= max_retries:
        try:
            # Load fresh session from DB (bypass cache to get the updated tool result)
            await invalidate_session_cache(session_id)
@@ -1918,15 +1925,19 @@ async def _generate_llm_continuation(
            if session_id:
                extra_body["session_id"] = session_id[:128]
        # Make non-streaming LLM call (no tools - just text response)
            from typing import cast
            from openai.types.chat import ChatCompletionMessageParam
-        # No tools parameter = text-only response (no tool calls)
+            # Include tools with tool_choice="none" to allow the provider to validate
            # tool interactions in the message history without allowing new tool calls.
            # Some providers (especially Anthropic via OpenRouter) require the tools
            # schema to be present when the conversation contains tool_use/tool_result.
            response = await client.chat.completions.create(
                model=config.model,
                messages=cast(list[ChatCompletionMessageParam], messages),
                tools=tools,
                tool_choice="none",
                extra_body=extra_body,
            )
@@ -1934,7 +1945,9 @@ async def _generate_llm_continuation(
                assistant_content = response.choices[0].message.content
                # Reload session from DB to avoid race condition with user messages
-            # that may have been sent while we were generating the LLM response
+                # that may have been sent while we were generating the LLM response.
                # Invalidate cache first to ensure we get fresh data from the database.
                await invalidate_session_cache(session_id)
                fresh_session = await get_chat_session(session_id, user_id)
                if not fresh_session:
                    logger.error(
@@ -1959,8 +1972,38 @@ async def _generate_llm_continuation(
                    f"Generated LLM continuation for session {session_id}, "
                    f"response length: {len(assistant_content)}"
                )
                return  # Success - exit the retry loop
            else:
-            logger.warning(f"LLM continuation returned empty response for {session_id}")
+                logger.warning(
                    f"LLM continuation returned empty response for {session_id}"
                )
                return  # Empty response is not retryable
        except Exception as e:
-        logger.error(f"Failed to generate LLM continuation: {e}", exc_info=True)
+            last_error = e
            if _is_retryable_error(e) and retry_count < max_retries:
                retry_count += 1
                delay = min(
                    BASE_DELAY_SECONDS * (2 ** (retry_count - 1)),
                    MAX_DELAY_SECONDS,
                )
                logger.warning(
                    f"Retryable error in LLM continuation for {session_id}: {e!s}. "
                    f"Retrying in {delay:.1f}s (attempt {retry_count}/{max_retries})"
                )
                await asyncio.sleep(delay)
                continue
            else:
                # Non-retryable error or max retries exceeded
                logger.error(
                    f"Failed to generate LLM continuation for {session_id}: {e!s}",
                    exc_info=True,
                )
                break
    # If we get here, all retries failed
    if last_error:
        logger.error(
            f"LLM continuation failed after {retry_count} retries for {session_id}. "
            f"Last error: {last_error!s}"
        )