fix(platform): address PR review comments

- transcript.py: properly flatten tool_result content blocks including nested list structures instead of losing tool_use_id silently - transcript_builder.py: make replace_entries atomic — parse into temp builder first, only swap on success - service.py: skip resume when compaction fails (avoid re-triggering "Prompt is too long"), wrap upload in try/except for best-effort - baseline/service.py: use floor of 0 instead of 1 for token estimates - rate_limit.py: broaden Redis exception handling to catch all errors (timeouts, etc.) for true fail-open behavior; remove unused import - helpers.py: return ErrorResponse on post-exec InsufficientBalanceError instead of swallowing; remove raw exception text from user message - helpers_test.py: update test to expect ErrorResponse - UsageLimits.tsx: remove dark:* classes (copilot has no dark mode yet)
2026-04-08 03:00:28 -04:00 · 2026-03-13 18:12:24 +07:00
parent 1704214394
commit 0200748225
8 changed files with 93 additions and 50 deletions
--- a/autogpt_platform/backend/backend/copilot/baseline/service.py
+++ b/autogpt_platform/backend/backend/copilot/baseline/service.py
@@ -438,10 +438,10 @@ async def stream_chat_completion_baseline(
            )

            turn_prompt_tokens = max(
-                estimate_token_count(openai_messages, model=config.model), 1
+                estimate_token_count(openai_messages, model=config.model), 0
            )
            turn_completion_tokens = max(
-                estimate_token_count_str(assistant_text, model=config.model), 1
+                estimate_token_count_str(assistant_text, model=config.model), 0
            )
            logger.info(
                "[Baseline] No streaming usage reported; estimated tokens: "
--- a/autogpt_platform/backend/backend/copilot/rate_limit.py
+++ b/autogpt_platform/backend/backend/copilot/rate_limit.py
@@ -11,7 +11,6 @@ import logging
 from datetime import UTC, datetime, timedelta

 from pydantic import BaseModel, Field
-from redis.exceptions import RedisError

 from backend.data.redis_client import get_redis_async

@@ -113,8 +112,10 @@ async def get_usage_status(
        )
        daily_used = int(daily_raw or 0)
        weekly_used = int(weekly_raw or 0)
-    except (RedisError, ConnectionError, OSError):
-        logger.warning("Redis unavailable for usage status, returning zeros")
+    except Exception:
+        logger.warning(
+            "Redis unavailable for usage status, returning zeros", exc_info=True
+        )

    return CoPilotUsageStatus(
        daily=UsageWindow(
@@ -154,8 +155,10 @@ async def check_rate_limit(
        )
        daily_used = int(daily_raw or 0)
        weekly_used = int(weekly_raw or 0)
-    except (RedisError, ConnectionError, OSError):
-        logger.warning("Redis unavailable for rate limit check, allowing request")
+    except Exception:
+        logger.warning(
+            "Redis unavailable for rate limit check, allowing request", exc_info=True
+        )
        return

    if daily_token_limit > 0 and daily_used >= daily_token_limit:
@@ -238,8 +241,9 @@ async def record_token_usage(
        pipe.expire(w_key, max(seconds_until_weekly_reset, 1))

        await pipe.execute()
-    except (RedisError, ConnectionError, OSError):
+    except Exception:
        logger.warning(
            "Redis unavailable for recording token usage (tokens=%d)",
            total,
+            exc_info=True,
        )
--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
@@ -861,21 +861,41 @@ async def stream_chat_completion_sdk(
                            for line in transcript_content.splitlines()
                            if line.strip()
                        )
-                        # Upload the compacted version for future turns
-                        await upload_transcript(
-                            user_id=user_id or "",
-                            session_id=session_id,
-                            content=transcript_content,
-                            message_count=dl.message_count,
-                            log_prefix=log_prefix,
+                        # Best-effort upload of compacted version
+                        try:
+                            await upload_transcript(
+                                user_id=user_id or "",
+                                session_id=session_id,
+                                content=transcript_content,
+                                message_count=dl.message_count,
+                                log_prefix=log_prefix,
+                            )
+                        except Exception:
+                            logger.warning(
+                                "%s Failed to upload compacted transcript",
+                                log_prefix,
+                                exc_info=True,
+                            )
+                    else:
+                        # Compaction failed — skip resume to avoid
+                        # "Prompt is too long" on an oversized transcript.
+                        logger.warning(
+                            "%s Compaction failed, skipping resume for this turn",
+                            log_prefix,
                        )
+                        transcript_content = ""

-                # Load previous FULL context into builder
-                transcript_builder.load_previous(
-                    transcript_content, log_prefix=log_prefix
-                )
-                resume_file = write_transcript_to_tempfile(
-                    transcript_content, session_id, sdk_cwd
+                # Load previous context into builder (empty string is a no-op)
+                if transcript_content:
+                    transcript_builder.load_previous(
+                        transcript_content, log_prefix=log_prefix
+                    )
+                resume_file = (
+                    write_transcript_to_tempfile(
+                        transcript_content, session_id, sdk_cwd
+                    )
+                    if transcript_content
+                    else None
                )
                if resume_file:
                    use_resume = True
--- a/autogpt_platform/backend/backend/copilot/sdk/transcript.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/transcript.py
@@ -490,13 +490,26 @@ def _transcript_to_messages(content: str) -> list[dict]:
                    parts.append(block)
            msg_dict["content"] = "\n".join(parts) if parts else ""
        elif isinstance(raw_content, list):
-            parts = []
+            str_parts: list[str] = []
            for block in raw_content:
                if isinstance(block, dict) and block.get("type") == "tool_result":
-                    parts.append(str(block.get("content", "")))
+                    # Flatten tool_result content for summarisation;
+                    # tool_use_id pairing is not preserved through LLM
+                    # compaction — the compacted transcript uses fresh IDs.
+                    inner = block.get("content", "")
+                    if isinstance(inner, list):
+                        for sub in inner:
+                            if isinstance(sub, dict):
+                                str_parts.append(str(sub.get("text", "")))
+                            else:
+                                str_parts.append(str(sub))
+                    else:
+                        str_parts.append(str(inner))
+                elif isinstance(block, dict) and block.get("type") == "text":
+                    str_parts.append(str(block.get("text", "")))
                elif isinstance(block, str):
-                    parts.append(block)
-            msg_dict["content"] = "\n".join(parts) if parts else ""
+                    str_parts.append(block)
+            msg_dict["content"] = "\n".join(str_parts) if str_parts else ""
        else:
            msg_dict["content"] = raw_content or ""
        messages.append(msg_dict)
--- a/autogpt_platform/backend/backend/copilot/sdk/transcript_builder.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/transcript_builder.py
@@ -185,9 +185,18 @@ class TranscriptBuilder:
        pre-compaction history.
        """
        prev_count = len(self._entries)
-        self._entries.clear()
-        self._last_uuid = None
-        self.load_previous(content, log_prefix=log_prefix)
+        temp = TranscriptBuilder()
+        try:
+            temp.load_previous(content, log_prefix=log_prefix)
+        except Exception:
+            logger.exception(
+                "%s Failed to parse compacted transcript; keeping %d existing entries",
+                log_prefix,
+                prev_count,
+            )
+            return
+        self._entries = temp._entries
+        self._last_uuid = temp._last_uuid
        logger.info(
            "%s Replaced %d entries with %d compacted entries",
            log_prefix,
--- a/autogpt_platform/backend/backend/copilot/tools/helpers.py
+++ b/autogpt_platform/backend/backend/copilot/tools/helpers.py
@@ -188,14 +188,18 @@ async def execute_block(
                    ),
                )
            except InsufficientBalanceError:
-                # Concurrent spend drained balance after our pre-check passed.
-                # Block already executed (with possible side effects), so return
-                # its output. Log the billing leak amount for reconciliation.
                logger.warning(
                    "Post-exec credit charge failed for block %s (cost=%d)",
                    block.name,
                    cost,
                )
+                return ErrorResponse(
+                    message=(
+                        f"Insufficient credits to complete '{block.name}'. "
+                        "Please top up your credits to continue."
+                    ),
+                    session_id=session_id,
+                )

        return BlockOutputResponse(
            message=f"Block '{block.name}' executed successfully",
@@ -216,7 +220,7 @@ async def execute_block(
    except Exception as e:
        logger.error("Unexpected error executing block: %s", e, exc_info=True)
        return ErrorResponse(
-            message=f"Failed to execute block: {e}",
+            message="Failed to execute block",
            error=str(e),
            session_id=session_id,
        )
--- a/autogpt_platform/backend/backend/copilot/tools/helpers_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/helpers_test.py
@@ -150,8 +150,8 @@ class TestExecuteBlockCreditCharging:
        mock_get_credits.assert_not_awaited()
        mock_spend_credits.assert_not_awaited()

-    async def test_returns_output_on_post_exec_insufficient_balance(self):
-        """If charging fails after execution, output is still returned (block already ran)."""
+    async def test_returns_error_on_post_exec_insufficient_balance(self):
+        """If charging fails after execution, return ErrorResponse."""
        from backend.util.exceptions import InsufficientBalanceError

        block = _make_block()
@@ -185,9 +185,8 @@ class TestExecuteBlockCreditCharging:
                matched_credentials={},
            )

-        # Block already executed (with side effects), so output is returned
-        assert isinstance(result, BlockOutputResponse)
-        assert result.success is True
+        assert isinstance(result, ErrorResponse)
+        assert "Insufficient credits" in result.message


 # ---------------------------------------------------------------------------
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/UsageLimits.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/UsageLimits.tsx
@@ -55,17 +55,15 @@ function UsageBar({
  return (
    <div className="flex flex-col gap-1">
      <div className="flex items-baseline justify-between">
-        <span className="text-xs font-medium text-neutral-700 dark:text-neutral-300">
-          {label}
-        </span>
-        <span className="text-[11px] tabular-nums text-neutral-500 dark:text-neutral-400">
+        <span className="text-xs font-medium text-neutral-700">{label}</span>
+        <span className="text-[11px] tabular-nums text-neutral-500">
          {percentLabel}
        </span>
      </div>
-      <div className="text-[10px] text-neutral-400 dark:text-neutral-500">
+      <div className="text-[10px] text-neutral-400">
        Resets {formatResetTime(resetsAt)}
      </div>
-      <div className="h-2 w-full overflow-hidden rounded-full bg-neutral-200 dark:bg-neutral-700">
+      <div className="h-2 w-full overflow-hidden rounded-full bg-neutral-200">
        <div
          className={`h-full rounded-full transition-[width] duration-300 ease-out ${
            isHigh ? "bg-orange-500" : "bg-blue-500"
@@ -89,17 +87,13 @@ export function UsagePanelContent({

  if (!hasDailyLimit && !hasWeeklyLimit) {
    return (
-      <div className="text-xs text-neutral-500 dark:text-neutral-400">
-        No usage limits configured
-      </div>
+      <div className="text-xs text-neutral-500">No usage limits configured</div>
    );
  }

  return (
    <div className="flex flex-col gap-3">
-      <div className="text-xs font-semibold text-neutral-800 dark:text-neutral-200">
-        Usage limits
-      </div>
+      <div className="text-xs font-semibold text-neutral-800">Usage limits</div>
      {hasDailyLimit && (
        <UsageBar
          label="Today"
@@ -119,7 +113,7 @@ export function UsagePanelContent({
      {showBillingLink && (
        <a
          href="/profile/credits"
-          className="text-[11px] text-blue-600 hover:underline dark:text-blue-400"
+          className="text-[11px] text-blue-600 hover:underline"
        >
          Learn more about usage limits
        </a>