From a7d97dacf33ca8b0ec4bf8e6b584ceebcd887d9f Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 00:00:07 +0700 Subject: [PATCH] fix(copilot): address review comments on pending-messages PR - Use _pre_drain_msg_count for transcript load gate (len > 1 check) to avoid spurious transcript load on first turn with pending messages - Use _pre_drain_msg_count for Graphiti warm context gate to prevent warm context skip when pending messages are drained at first turn - Add context.url/content length validators to QueuePendingMessageRequest to prevent LLM context-window stuffing (2K url, 32K content caps) - Rename underscore-prefixed active variables (_pm, _content, _pt) to conventional names (pm, content, pt) per Python convention --- .../backend/api/features/chat/routes.py | 23 +++++++++++++++++++ .../backend/copilot/baseline/service.py | 22 ++++++++++-------- .../backend/backend/copilot/sdk/service.py | 4 ++-- 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index 6d057b0270..854ca116fc 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -142,6 +142,29 @@ class QueuePendingMessageRequest(BaseModel): ) file_ids: list[str] | None = Field(default=None, max_length=20) + @field_validator("context") + @classmethod + def _validate_context_length( + cls, v: dict[str, str] | None + ) -> dict[str, str] | None: + if v is None: + return v + # Cap context values to prevent LLM context-window stuffing via + # large page payloads (url: 2 KB, content: 32 KB). + _URL_LIMIT = 2_000 + _CONTENT_LIMIT = 32_000 + url = v.get("url", "") + if len(url) > _URL_LIMIT: + raise ValueError( + f"context.url exceeds maximum length of {_URL_LIMIT} characters" + ) + content = v.get("content", "") + if len(content) > _CONTENT_LIMIT: + raise ValueError( + f"context.content exceeds maximum length of {_CONTENT_LIMIT} characters" + ) + return v + class QueuePendingMessageResponse(BaseModel): """Response for the pending-message endpoint. diff --git a/autogpt_platform/backend/backend/copilot/baseline/service.py b/autogpt_platform/backend/backend/copilot/baseline/service.py index f46c31ff21..4bcdfd80d9 100644 --- a/autogpt_platform/backend/backend/copilot/baseline/service.py +++ b/autogpt_platform/backend/backend/copilot/baseline/service.py @@ -950,12 +950,12 @@ async def stream_chat_completion_baseline( len(drained_at_start), session_id, ) - for _pm in drained_at_start: - _content = format_pending_as_user_message(_pm)["content"] + for pm in drained_at_start: + content = format_pending_as_user_message(pm)["content"] # Append directly — pending messages are atomically-popped from # Redis and are never stale-cache duplicates, so the # maybe_append_user_message dedup is wrong here. - session.messages.append(ChatMessage(role="user", content=_content)) + session.messages.append(ChatMessage(role="user", content=content)) session = await upsert_chat_session(session) @@ -999,8 +999,10 @@ async def stream_chat_completion_baseline( prompt_task = _build_cacheable_system_prompt(None) # Run download + prompt build concurrently — both are independent I/O - # on the request critical path. - if user_id and len(session.messages) > 1: + # on the request critical path. Use the pre-drain count so pending + # messages drained at turn start don't spuriously trigger a transcript + # load on an actual first turn. + if user_id and _pre_drain_msg_count > 1: transcript_covers_prefix, (base_system_prompt, understanding) = ( await asyncio.gather( _load_prior_transcript( @@ -1025,9 +1027,9 @@ async def stream_chat_completion_baseline( # missing them and a mid-turn upload could leave a malformed # assistant-after-assistant structure on the next turn. if drained_at_start: - for _pm in drained_at_start: + for pm in drained_at_start: transcript_builder.append_user( - content=format_pending_as_user_message(_pm)["content"] + content=format_pending_as_user_message(pm)["content"] ) # Generate title for new sessions @@ -1050,8 +1052,10 @@ async def stream_chat_completion_baseline( graphiti_supplement = get_graphiti_supplement() if graphiti_enabled else "" system_prompt = base_system_prompt + get_baseline_supplement() + graphiti_supplement - # Warm context: pre-load relevant facts from Graphiti on first turn - if graphiti_enabled and user_id and len(session.messages) <= 1: + # Warm context: pre-load relevant facts from Graphiti on first turn. + # Use the pre-drain count so pending messages drained at turn start + # don't prevent warm context injection on an actual first turn. + if graphiti_enabled and user_id and _pre_drain_msg_count <= 1: from backend.copilot.graphiti.context import fetch_warm_context warm_ctx = await fetch_warm_context(user_id, message or "") diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py index 3384fc82f6..39299ba14b 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service.py @@ -2308,11 +2308,11 @@ async def stream_chat_completion_sdk( pending_texts: list[str] = [ format_pending_as_user_message(pm)["content"] for pm in pending_at_start ] - for _pt in pending_texts: + for pt in pending_texts: # Append directly — pending messages are atomically-popped from # Redis and are never stale-cache duplicates, so the # maybe_append_user_message dedup is wrong here. - session.messages.append(ChatMessage(role="user", content=_pt)) + session.messages.append(ChatMessage(role="user", content=pt)) if current_message.strip(): current_message = current_message + "\n\n" + "\n\n".join(pending_texts) else: