fix(copilot): address review comments on pending-messages PR

- Use _pre_drain_msg_count for transcript load gate (len > 1 check) to avoid spurious transcript load on first turn with pending messages - Use _pre_drain_msg_count for Graphiti warm context gate to prevent warm context skip when pending messages are drained at first turn - Add context.url/content length validators to QueuePendingMessageRequest to prevent LLM context-window stuffing (2K url, 32K content caps) - Rename underscore-prefixed active variables (_pm, _content, _pt) to conventional names (pm, content, pt) per Python convention
2026-04-30 03:00:41 -04:00 · 2026-04-11 00:00:07 +07:00
parent 39e89b50a7
commit a7d97dacf3
3 changed files with 38 additions and 11 deletions
--- a/autogpt_platform/backend/backend/api/features/chat/routes.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes.py
@@ -142,6 +142,29 @@ class QueuePendingMessageRequest(BaseModel):
    )
    file_ids: list[str] | None = Field(default=None, max_length=20)

+    @field_validator("context")
+    @classmethod
+    def _validate_context_length(
+        cls, v: dict[str, str] | None
+    ) -> dict[str, str] | None:
+        if v is None:
+            return v
+        # Cap context values to prevent LLM context-window stuffing via
+        # large page payloads (url: 2 KB, content: 32 KB).
+        _URL_LIMIT = 2_000
+        _CONTENT_LIMIT = 32_000
+        url = v.get("url", "")
+        if len(url) > _URL_LIMIT:
+            raise ValueError(
+                f"context.url exceeds maximum length of {_URL_LIMIT} characters"
+            )
+        content = v.get("content", "")
+        if len(content) > _CONTENT_LIMIT:
+            raise ValueError(
+                f"context.content exceeds maximum length of {_CONTENT_LIMIT} characters"
+            )
+        return v
+

 class QueuePendingMessageResponse(BaseModel):
    """Response for the pending-message endpoint.
--- a/autogpt_platform/backend/backend/copilot/baseline/service.py
+++ b/autogpt_platform/backend/backend/copilot/baseline/service.py
@@ -950,12 +950,12 @@ async def stream_chat_completion_baseline(
            len(drained_at_start),
            session_id,
        )
-        for _pm in drained_at_start:
-            _content = format_pending_as_user_message(_pm)["content"]
+        for pm in drained_at_start:
+            content = format_pending_as_user_message(pm)["content"]
            # Append directly — pending messages are atomically-popped from
            # Redis and are never stale-cache duplicates, so the
            # maybe_append_user_message dedup is wrong here.
-            session.messages.append(ChatMessage(role="user", content=_content))
+            session.messages.append(ChatMessage(role="user", content=content))

    session = await upsert_chat_session(session)

@@ -999,8 +999,10 @@ async def stream_chat_completion_baseline(
        prompt_task = _build_cacheable_system_prompt(None)

    # Run download + prompt build concurrently — both are independent I/O
-    # on the request critical path.
-    if user_id and len(session.messages) > 1:
+    # on the request critical path.  Use the pre-drain count so pending
+    # messages drained at turn start don't spuriously trigger a transcript
+    # load on an actual first turn.
+    if user_id and _pre_drain_msg_count > 1:
        transcript_covers_prefix, (base_system_prompt, understanding) = (
            await asyncio.gather(
                _load_prior_transcript(
@@ -1025,9 +1027,9 @@ async def stream_chat_completion_baseline(
    # missing them and a mid-turn upload could leave a malformed
    # assistant-after-assistant structure on the next turn.
    if drained_at_start:
-        for _pm in drained_at_start:
+        for pm in drained_at_start:
            transcript_builder.append_user(
-                content=format_pending_as_user_message(_pm)["content"]
+                content=format_pending_as_user_message(pm)["content"]
            )

    # Generate title for new sessions
@@ -1050,8 +1052,10 @@ async def stream_chat_completion_baseline(
    graphiti_supplement = get_graphiti_supplement() if graphiti_enabled else ""
    system_prompt = base_system_prompt + get_baseline_supplement() + graphiti_supplement

-    # Warm context: pre-load relevant facts from Graphiti on first turn
-    if graphiti_enabled and user_id and len(session.messages) <= 1:
+    # Warm context: pre-load relevant facts from Graphiti on first turn.
+    # Use the pre-drain count so pending messages drained at turn start
+    # don't prevent warm context injection on an actual first turn.
+    if graphiti_enabled and user_id and _pre_drain_msg_count <= 1:
        from backend.copilot.graphiti.context import fetch_warm_context

        warm_ctx = await fetch_warm_context(user_id, message or "")
--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
@@ -2308,11 +2308,11 @@ async def stream_chat_completion_sdk(
            pending_texts: list[str] = [
                format_pending_as_user_message(pm)["content"] for pm in pending_at_start
            ]
-            for _pt in pending_texts:
+            for pt in pending_texts:
                # Append directly — pending messages are atomically-popped from
                # Redis and are never stale-cache duplicates, so the
                # maybe_append_user_message dedup is wrong here.
-                session.messages.append(ChatMessage(role="user", content=_pt))
+                session.messages.append(ChatMessage(role="user", content=pt))
            if current_message.strip():
                current_message = current_message + "\n\n" + "\n\n".join(pending_texts)
            else: