fix: preserve non-dict error bodies as truncated strings

Addresses CodeRabbit feedback: non-dict error bodies (e.g., HTML error pages from proxies) were silently discarded, losing diagnostic info. Now returns truncated string representation instead of None.
fix: use getattr for pyright type checking on dynamic error attributes
2026-02-09 22:35:54 -05:00 · 2026-02-09 07:53:44 +00:00 · 2026-02-09 07:52:45 +00:00 · 2026-02-09 07:42:06 +00:00 · 2026-02-09 07:41:02 +00:00 · 2026-02-03 12:36:54 +00:00
2 changed files with 121 additions and 15 deletions
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -1090,6 +1090,7 @@ async def _stream_chat_chunks(
                return
            except Exception as e:
                last_error = e
+
                if _is_retryable_error(e) and retry_count < MAX_RETRIES:
                    retry_count += 1
                    # Calculate delay with exponential backoff
@@ -1105,12 +1106,26 @@ async def _stream_chat_chunks(
                    continue  # Retry the stream
                else:
                    # Non-retryable error or max retries exceeded
-                    logger.error(
-                        f"Error in stream (not retrying): {e!s}",
-                        exc_info=True,
+                    _log_api_error(
+                        error=e,
+                        session_id=session.session_id if session else None,
+                        message_count=len(messages) if messages else None,
+                        model=model,
+                        retry_count=retry_count,
                    )
                    error_code = None
                    error_text = str(e)
+
+                    error_details = _extract_api_error_details(e)
+                    if error_details.get("response_body"):
+                        body = error_details["response_body"]
+                        if isinstance(body, dict):
+                            err = body.get("error")
+                            if isinstance(err, dict) and err.get("message"):
+                                error_text = err["message"]
+                            elif body.get("message"):
+                                error_text = body["message"]
+
                    if _is_region_blocked_error(e):
                        error_code = "MODEL_NOT_AVAILABLE_REGION"
                        error_text = (
@@ -1127,9 +1142,12 @@ async def _stream_chat_chunks(

        # If we exit the retry loop without returning, it means we exhausted retries
        if last_error:
-            logger.error(
-                f"Max retries ({MAX_RETRIES}) exceeded. Last error: {last_error!s}",
-                exc_info=True,
+            _log_api_error(
+                error=last_error,
+                session_id=session.session_id if session else None,
+                message_count=len(messages) if messages else None,
+                model=model,
+                retry_count=MAX_RETRIES,
            )
            yield StreamError(errorText=f"Max retries exceeded: {last_error!s}")
            yield StreamFinish()
@@ -1701,6 +1719,7 @@ async def _generate_llm_continuation(
                break  # Success, exit retry loop
            except Exception as e:
                last_error = e
+
                if _is_retryable_error(e) and retry_count < MAX_RETRIES:
                    retry_count += 1
                    delay = min(
@@ -1714,17 +1733,23 @@ async def _generate_llm_continuation(
                    await asyncio.sleep(delay)
                    continue
                else:
-                    # Non-retryable error - log and exit gracefully
-                    logger.error(
-                        f"Non-retryable error in LLM continuation: {e!s}",
-                        exc_info=True,
+                    # Non-retryable error - log details and exit gracefully
+                    _log_api_error(
+                        error=e,
+                        session_id=session_id,
+                        message_count=len(messages) if messages else None,
+                        model=config.model,
+                        retry_count=retry_count,
                    )
                    return

        if last_error:
-            logger.error(
-                f"Max retries ({MAX_RETRIES}) exceeded for LLM continuation. "
-                f"Last error: {last_error!s}"
+            _log_api_error(
+                error=last_error,
+                session_id=session_id,
+                message_count=len(messages) if messages else None,
+                model=config.model,
+                retry_count=MAX_RETRIES,
            )
            return

@@ -1764,6 +1789,89 @@ async def _generate_llm_continuation(
        logger.error(f"Failed to generate LLM continuation: {e}", exc_info=True)


+def _log_api_error(
+    error: Exception,
+    session_id: str | None = None,
+    message_count: int | None = None,
+    model: str | None = None,
+    retry_count: int = 0,
+) -> None:
+    """Log detailed API error information for debugging."""
+    details = _extract_api_error_details(error)
+    details["session_id"] = session_id
+    details["message_count"] = message_count
+    details["model"] = model
+    details["retry_count"] = retry_count
+
+    if isinstance(error, RateLimitError):
+        logger.warning(f"Rate limit error: {details}")
+    elif isinstance(error, APIConnectionError):
+        logger.warning(f"API connection error: {details}")
+    elif isinstance(error, APIStatusError) and error.status_code >= 500:
+        logger.error(f"API server error (5xx): {details}")
+    else:
+        logger.error(f"API error: {details}")
+
+
+def _extract_api_error_details(error: Exception) -> dict[str, Any]:
+    """Extract detailed information from OpenAI/OpenRouter API errors."""
+    error_msg = str(error)
+    details: dict[str, Any] = {
+        "error_type": type(error).__name__,
+        "error_message": error_msg[:500] + "..." if len(error_msg) > 500 else error_msg,
+    }
+
+    if hasattr(error, "code"):
+        details["code"] = getattr(error, "code", None)
+    if hasattr(error, "param"):
+        details["param"] = getattr(error, "param", None)
+
+    if isinstance(error, APIStatusError):
+        details["status_code"] = error.status_code
+        details["request_id"] = getattr(error, "request_id", None)
+
+        if hasattr(error, "body") and error.body:
+            details["response_body"] = _sanitize_error_body(error.body)
+
+        if hasattr(error, "response") and error.response:
+            headers = error.response.headers
+            details["openrouter_provider"] = headers.get("x-openrouter-provider")
+            details["openrouter_model"] = headers.get("x-openrouter-model")
+            details["retry_after"] = headers.get("retry-after")
+            details["rate_limit_remaining"] = headers.get("x-ratelimit-remaining")
+
+    return details
+
+
+def _sanitize_error_body(
+    body: Any, max_length: int = 2000
+) -> dict[str, Any] | str | None:
+    """Extract only safe fields from error response body to avoid logging sensitive data."""
+    if not isinstance(body, dict):
+        # Non-dict bodies (e.g., HTML error pages) - return truncated string
+        if body is not None:
+            body_str = str(body)
+            if len(body_str) > max_length:
+                return body_str[:max_length] + "...[truncated]"
+            return body_str
+        return None
+
+    safe_fields = ("message", "type", "code", "param", "error")
+    sanitized: dict[str, Any] = {}
+
+    for field in safe_fields:
+        if field in body:
+            value = body[field]
+            if field == "error" and isinstance(value, dict):
+                sanitized[field] = _sanitize_error_body(value, max_length)
+            elif isinstance(value, str) and len(value) > max_length:
+                sanitized[field] = value[:max_length] + "...[truncated]"
+            else:
+                sanitized[field] = value
+
+    return sanitized if sanitized else None
+
+
 async def _generate_llm_continuation_with_streaming(
    session_id: str,
    user_id: str | None,
--- a/autogpt_platform/backend/backend/data/execution_queue_test.py
+++ b/autogpt_platform/backend/backend/data/execution_queue_test.py
@@ -3,8 +3,6 @@
 import queue
 import threading

-import pytest
-
 from backend.data.execution import ExecutionQueue
Author	SHA1	Message	Date
Otto	62c9e840b8	fix: preserve non-dict error bodies as truncated strings Addresses CodeRabbit feedback: non-dict error bodies (e.g., HTML error pages from proxies) were silently discarded, losing diagnostic info. Now returns truncated string representation instead of None.	2026-02-09 07:53:44 +00:00
Otto	495d01b09b	fix: use getattr for pyright type checking on dynamic error attributes Fixes pyright errors: - Cannot access attribute 'code' for class 'Exception' - Cannot access attribute 'param' for class 'Exception' Using getattr() instead of direct attribute access satisfies pyright's type checker while maintaining the same runtime behavior.	2026-02-09 07:52:45 +00:00
Otto	b334f1a843	fix: defensive error body extraction to handle None and non-dict cases Addresses review feedback: body.get('error', {}).get('message') is unsafe when body['error'] is None or not a dict. Now properly checks isinstance() before accessing nested fields, and falls back to top-level message field.	2026-02-09 07:42:06 +00:00
Otto	5fd1482944	Merge branch 'dev' into fix/copilot-error-logging Resolved conflict in service.py by keeping both: - Error logging functions (_log_api_error, _extract_api_error_details, _sanitize_error_body) - Streaming continuation function (_generate_llm_continuation_with_streaming)	2026-02-09 07:41:02 +00:00
Otto	efd1e96235	feat(copilot): Add detailed API error logging for debugging Adds comprehensive error logging for OpenRouter/OpenAI API errors to help diagnose issues like provider routing failures, context length exceeded, rate limits, etc. Changes: - Add _extract_api_error_details() to extract rich info from API errors including status code, response body, OpenRouter headers, etc. - Add _log_api_error() helper to log errors with context (session ID, message count, model, retry count) - Update error handling in _stream_chat_chunks() to use new logging - Update error handling in _generate_llm_continuation() to use new logging - Extract provider's error message from response body for better user feedback This helps debug issues like SECRT-1859 where OpenRouter returns 'provider returned error' with provider_name='unknown' without capturing the actual error details. Refs: SECRT-1859	2026-02-03 12:36:54 +00:00