mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
fix(copilot): address review comments on pending-messages PR
- Use _pre_drain_msg_count for transcript load gate (len > 1 check) to avoid spurious transcript load on first turn with pending messages - Use _pre_drain_msg_count for Graphiti warm context gate to prevent warm context skip when pending messages are drained at first turn - Add context.url/content length validators to QueuePendingMessageRequest to prevent LLM context-window stuffing (2K url, 32K content caps) - Rename underscore-prefixed active variables (_pm, _content, _pt) to conventional names (pm, content, pt) per Python convention
This commit is contained in:
@@ -142,6 +142,29 @@ class QueuePendingMessageRequest(BaseModel):
|
||||
)
|
||||
file_ids: list[str] | None = Field(default=None, max_length=20)
|
||||
|
||||
@field_validator("context")
|
||||
@classmethod
|
||||
def _validate_context_length(
|
||||
cls, v: dict[str, str] | None
|
||||
) -> dict[str, str] | None:
|
||||
if v is None:
|
||||
return v
|
||||
# Cap context values to prevent LLM context-window stuffing via
|
||||
# large page payloads (url: 2 KB, content: 32 KB).
|
||||
_URL_LIMIT = 2_000
|
||||
_CONTENT_LIMIT = 32_000
|
||||
url = v.get("url", "")
|
||||
if len(url) > _URL_LIMIT:
|
||||
raise ValueError(
|
||||
f"context.url exceeds maximum length of {_URL_LIMIT} characters"
|
||||
)
|
||||
content = v.get("content", "")
|
||||
if len(content) > _CONTENT_LIMIT:
|
||||
raise ValueError(
|
||||
f"context.content exceeds maximum length of {_CONTENT_LIMIT} characters"
|
||||
)
|
||||
return v
|
||||
|
||||
|
||||
class QueuePendingMessageResponse(BaseModel):
|
||||
"""Response for the pending-message endpoint.
|
||||
|
||||
@@ -950,12 +950,12 @@ async def stream_chat_completion_baseline(
|
||||
len(drained_at_start),
|
||||
session_id,
|
||||
)
|
||||
for _pm in drained_at_start:
|
||||
_content = format_pending_as_user_message(_pm)["content"]
|
||||
for pm in drained_at_start:
|
||||
content = format_pending_as_user_message(pm)["content"]
|
||||
# Append directly — pending messages are atomically-popped from
|
||||
# Redis and are never stale-cache duplicates, so the
|
||||
# maybe_append_user_message dedup is wrong here.
|
||||
session.messages.append(ChatMessage(role="user", content=_content))
|
||||
session.messages.append(ChatMessage(role="user", content=content))
|
||||
|
||||
session = await upsert_chat_session(session)
|
||||
|
||||
@@ -999,8 +999,10 @@ async def stream_chat_completion_baseline(
|
||||
prompt_task = _build_cacheable_system_prompt(None)
|
||||
|
||||
# Run download + prompt build concurrently — both are independent I/O
|
||||
# on the request critical path.
|
||||
if user_id and len(session.messages) > 1:
|
||||
# on the request critical path. Use the pre-drain count so pending
|
||||
# messages drained at turn start don't spuriously trigger a transcript
|
||||
# load on an actual first turn.
|
||||
if user_id and _pre_drain_msg_count > 1:
|
||||
transcript_covers_prefix, (base_system_prompt, understanding) = (
|
||||
await asyncio.gather(
|
||||
_load_prior_transcript(
|
||||
@@ -1025,9 +1027,9 @@ async def stream_chat_completion_baseline(
|
||||
# missing them and a mid-turn upload could leave a malformed
|
||||
# assistant-after-assistant structure on the next turn.
|
||||
if drained_at_start:
|
||||
for _pm in drained_at_start:
|
||||
for pm in drained_at_start:
|
||||
transcript_builder.append_user(
|
||||
content=format_pending_as_user_message(_pm)["content"]
|
||||
content=format_pending_as_user_message(pm)["content"]
|
||||
)
|
||||
|
||||
# Generate title for new sessions
|
||||
@@ -1050,8 +1052,10 @@ async def stream_chat_completion_baseline(
|
||||
graphiti_supplement = get_graphiti_supplement() if graphiti_enabled else ""
|
||||
system_prompt = base_system_prompt + get_baseline_supplement() + graphiti_supplement
|
||||
|
||||
# Warm context: pre-load relevant facts from Graphiti on first turn
|
||||
if graphiti_enabled and user_id and len(session.messages) <= 1:
|
||||
# Warm context: pre-load relevant facts from Graphiti on first turn.
|
||||
# Use the pre-drain count so pending messages drained at turn start
|
||||
# don't prevent warm context injection on an actual first turn.
|
||||
if graphiti_enabled and user_id and _pre_drain_msg_count <= 1:
|
||||
from backend.copilot.graphiti.context import fetch_warm_context
|
||||
|
||||
warm_ctx = await fetch_warm_context(user_id, message or "")
|
||||
|
||||
@@ -2308,11 +2308,11 @@ async def stream_chat_completion_sdk(
|
||||
pending_texts: list[str] = [
|
||||
format_pending_as_user_message(pm)["content"] for pm in pending_at_start
|
||||
]
|
||||
for _pt in pending_texts:
|
||||
for pt in pending_texts:
|
||||
# Append directly — pending messages are atomically-popped from
|
||||
# Redis and are never stale-cache duplicates, so the
|
||||
# maybe_append_user_message dedup is wrong here.
|
||||
session.messages.append(ChatMessage(role="user", content=_pt))
|
||||
session.messages.append(ChatMessage(role="user", content=pt))
|
||||
if current_message.strip():
|
||||
current_message = current_message + "\n\n" + "\n\n".join(pending_texts)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user