From c6a31cb501ca12886fa13d66c926cc43d001b38b Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 15:15:52 +0000 Subject: [PATCH 01/30] feat(copilot): inject user messages mid-turn via pending buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a user sends a follow-up message while a copilot turn is still streaming, we now queue it into a per-session Redis buffer and let the executor currently processing the turn drain it between tool-call rounds — the model sees the new message before its next LLM call. Previously such messages were blocked at the RabbitMQ/cluster-lock layer and only processed after the current turn completed. ### New module `backend/copilot/pending_messages.py` - Redis list buffer keyed by ``copilot:pending:{session_id}`` - Pub/sub notify channel as a wake-up hint for future blocking-wait use - Cap of ``MAX_PENDING_MESSAGES=10`` — trims oldest on overflow - 1h TTL matches ``stream_ttl`` default - Helpers: ``push_pending_message``, ``drain_pending_messages``, ``peek_pending_count``, ``clear_pending_messages``, ``format_pending_as_user_message`` ### New endpoint `POST /sessions/{session_id}/messages/pending` - Returns 202 + current buffer length - Persists the message to the DB so it's in the transcript immediately - Sanitises file IDs against the caller's workspace - Does NOT start a new turn (unlike ``stream``) ### Baseline path (simple — in-process injection) `backend/copilot/baseline/service.py` - Between iterations of ``tool_call_loop``, drain pending and append to the shared ``openai_messages`` list so the loop picks them up on the next LLM call - Persist session via ``upsert_chat_session`` after injection - Finally-block safety net clears the buffer on early exit ### SDK path (in-process injection via live client.query) `backend/copilot/sdk/service.py` - When the SDK loop detects ``acc.stream_completed``, before breaking, drain pending and send them via the existing open ``client.query()`` as a new user message; reset ``stream_completed`` to ``False`` and ``continue`` the async-for loop so we keep consuming CLI messages - Combines multiple drained messages into a single ``query()`` call via ``_combine_pending_messages`` to preserve ordering - Finally-block safety net clears the buffer on early exit - This works because the Claude Agent SDK's ``ClaudeSDKClient`` is a long-lived connection: ``query()`` writes a new user message to the CLI's stdin and the same ``receive_response()`` stream picks up the next turn's events, so we keep session continuity without releasing the cluster lock or restarting the subprocess ### Tests `backend/copilot/pending_messages_test.py` - FakeRedis + FakePipeline so tests don't need a live Redis - Covers push/drain, ordering, buffer cap (MAX_PENDING_MESSAGES), clear, publish hook, malformed-payload handling, and the format helper (plain / with context / with file_ids) Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/api/features/chat/routes.py | 99 ++++++++ .../backend/copilot/baseline/service.py | 48 ++++ .../backend/copilot/pending_messages.py | 196 +++++++++++++++ .../backend/copilot/pending_messages_test.py | 233 ++++++++++++++++++ .../backend/backend/copilot/sdk/service.py | 69 ++++++ 5 files changed, 645 insertions(+) create mode 100644 autogpt_platform/backend/backend/copilot/pending_messages.py create mode 100644 autogpt_platform/backend/backend/copilot/pending_messages_test.py diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index 57a7b9a204..a1eebdd6e3 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -29,6 +29,11 @@ from backend.copilot.model import ( get_user_sessions, update_session_title, ) +from backend.copilot.pending_messages import ( + MAX_PENDING_MESSAGES, + PendingMessage, + push_pending_message, +) from backend.copilot.rate_limit import ( CoPilotUsageStatus, RateLimitExceeded, @@ -119,6 +124,26 @@ class StreamChatRequest(BaseModel): ) +class QueuePendingMessageRequest(BaseModel): + """Request model for queueing a message into an in-flight turn. + + Unlike ``StreamChatRequest`` this endpoint does **not** start a new + turn — the message is appended to a per-session pending buffer that + the executor currently processing the turn will drain between tool + rounds. + """ + + message: str = Field(min_length=1) + context: dict[str, str] | None = None + file_ids: list[str] | None = Field(default=None, max_length=20) + + +class QueuePendingMessageResponse(BaseModel): + queued: bool + buffer_length: int + message: str + + class CreateSessionRequest(BaseModel): """Request model for creating a new chat session. @@ -1012,6 +1037,80 @@ async def stream_chat_post( ) +@router.post( + "/sessions/{session_id}/messages/pending", + response_model=QueuePendingMessageResponse, + status_code=202, +) +async def queue_pending_message( + session_id: str, + request: QueuePendingMessageRequest, + user_id: str = Security(auth.get_user_id), +): + """Queue a new user message into an in-flight copilot turn. + + When a user sends a follow-up message while a turn is still + streaming, we don't want to block them or start a separate turn — + this endpoint appends the message to a per-session pending buffer + that the executor currently processing the turn will drain between + tool-call rounds, injecting it into the conversation before the + model's next LLM call. + + Returns 202 with the new buffer length on success. If the buffer + is full (``MAX_PENDING_MESSAGES``), the oldest pending message is + evicted to make room for the new one — the newest message always + wins. + + Intended for the frontend "send while streaming" flow. If no turn + is currently in flight the message is still queued — the next turn + the user starts will pick it up before its first LLM call. + """ + await _validate_and_get_session(session_id, user_id) + + # Persist the message to the session immediately so it shows up in + # the transcript even before the executor drains the buffer. + chat_msg = ChatMessage(role="user", content=request.message) + if user_id: + track_user_message( + user_id=user_id, + session_id=session_id, + message_length=len(request.message), + ) + await append_and_save_message(session_id, chat_msg) + + # Sanitise file IDs to the user's own workspace (same logic as + # stream_chat_post) so injection doesn't surface other users' files. + sanitized_file_ids: list[str] = [] + if request.file_ids and user_id: + valid_ids = [fid for fid in request.file_ids if _UUID_RE.match(fid)] + if valid_ids: + workspace = await get_or_create_workspace(user_id) + files = await UserWorkspaceFile.prisma().find_many( + where={ + "id": {"in": valid_ids}, + "workspaceId": workspace.id, + "isDeleted": False, + } + ) + sanitized_file_ids = [wf.id for wf in files] + + pending = PendingMessage( + content=request.message, + file_ids=sanitized_file_ids, + context=request.context, + ) + buffer_length = await push_pending_message(session_id, pending) + + return QueuePendingMessageResponse( + queued=True, + buffer_length=buffer_length, + message=( + f"Queued — will be injected into the current turn " + f"(buffer: {buffer_length}/{MAX_PENDING_MESSAGES})" + ), + ) + + @router.get( "/sessions/{session_id}/stream", ) diff --git a/autogpt_platform/backend/backend/copilot/baseline/service.py b/autogpt_platform/backend/backend/copilot/baseline/service.py index a8044d80b7..1658d93eb1 100644 --- a/autogpt_platform/backend/backend/copilot/baseline/service.py +++ b/autogpt_platform/backend/backend/copilot/baseline/service.py @@ -35,6 +35,11 @@ from backend.copilot.model import ( maybe_append_user_message, upsert_chat_session, ) +from backend.copilot.pending_messages import ( + clear_pending_messages, + drain_pending_messages, + format_pending_as_user_message, +) from backend.copilot.prompting import get_baseline_supplement, get_graphiti_supplement from backend.copilot.response_model import ( StreamBaseResponse, @@ -1160,6 +1165,35 @@ async def stream_chat_completion_baseline( yield evt state.pending_events.clear() + # Inject any messages the user queued while the turn was + # running. ``tool_call_loop`` mutates ``openai_messages`` + # in-place, so appending here means the model sees the new + # messages before its next LLM call. Also persist them to + # the ChatSession so they're part of the durable transcript. + pending = await drain_pending_messages(session_id) + if pending: + for pm in pending: + maybe_append_user_message( + session, pm.content, is_user_message=True + ) + openai_messages.append(format_pending_as_user_message(pm)) + transcript_builder.append_user(content=pm.content) + try: + await upsert_chat_session(session) + except Exception as persist_err: + logger.warning( + "[Baseline] Failed to persist pending messages for " + "session %s: %s", + session_id, + persist_err, + ) + logger.info( + "[Baseline] Injected %d pending message(s) into " + "session %s mid-turn", + len(pending), + session_id, + ) + if loop_result and not loop_result.finished_naturally: limit_msg = ( f"Exceeded {_MAX_TOOL_ROUNDS} tool-call rounds " @@ -1200,6 +1234,20 @@ async def stream_chat_completion_baseline( yield StreamError(errorText=error_msg, code="baseline_error") # Still persist whatever we got finally: + # Safety net — if the stream exited early (error, timeout, etc.) + # we may still have queued pending messages in the buffer. Drop + # them so they don't leak into the next turn. During normal + # completion the tool-call loop drain will already have cleared + # the buffer, so this is a no-op in the happy path. + try: + await clear_pending_messages(session_id) + except Exception as clear_err: + logger.warning( + "[Baseline] Failed to clear pending messages for %s: %s", + session_id, + clear_err, + ) + # Set cost attributes on OTEL span before closing if _trace_ctx is not None: try: diff --git a/autogpt_platform/backend/backend/copilot/pending_messages.py b/autogpt_platform/backend/backend/copilot/pending_messages.py new file mode 100644 index 0000000000..0930a87e2d --- /dev/null +++ b/autogpt_platform/backend/backend/copilot/pending_messages.py @@ -0,0 +1,196 @@ +"""Pending-message buffer for in-flight copilot turns. + +When a user sends a new message while a copilot turn is already executing, +instead of blocking the frontend (or queueing a brand-new turn after the +current one finishes), we want the new message to be *injected into the +running turn* — appended between tool-call rounds so the model sees it +before its next LLM call. + +This module provides the cross-process buffer that makes that possible: + +- **Producer** (chat API route): pushes a pending message to Redis and + publishes a notification on a pub/sub channel. +- **Consumer** (executor running the turn): on each tool-call round, + drains the buffer and appends the pending messages to the conversation. + +The Redis list is the durable store; the pub/sub channel is a fast +wake-up hint for long-idle consumers (not used by default, but available +for future blocking-wait semantics). + +A hard cap of ``MAX_PENDING_MESSAGES`` per session prevents abuse. The +buffer is trimmed to the latest ``MAX_PENDING_MESSAGES`` on every push. +""" + +import json +import logging +import time +from typing import Any, cast + +from pydantic import BaseModel, Field + +from backend.data.redis_client import get_redis_async + +logger = logging.getLogger(__name__) + +# Per-session cap. Higher values risk a runaway consumer; lower values +# risk dropping user input under heavy typing. 10 was chosen as a +# reasonable ceiling — a user typing faster than the copilot can drain +# between tool rounds is already an unusual usage pattern. +MAX_PENDING_MESSAGES = 10 + +# Redis key + TTL. The buffer is ephemeral: if a turn completes or the +# executor dies, the pending messages should either have been drained +# already or are safe to drop (the user can resend). +_PENDING_KEY_PREFIX = "copilot:pending:" +_PENDING_CHANNEL_PREFIX = "copilot:pending:notify:" +_PENDING_TTL_SECONDS = 3600 # 1 hour — matches stream_ttl default + + +class PendingMessage(BaseModel): + """A user message queued for injection into an in-flight turn.""" + + content: str = Field(min_length=1) + file_ids: list[str] = Field(default_factory=list) + context: dict[str, str] | None = None + # Unix epoch seconds at enqueue time, for ordering and debugging. + enqueued_at: float = Field(default_factory=time.time) + + +def _buffer_key(session_id: str) -> str: + return f"{_PENDING_KEY_PREFIX}{session_id}" + + +def _notify_channel(session_id: str) -> str: + return f"{_PENDING_CHANNEL_PREFIX}{session_id}" + + +async def push_pending_message( + session_id: str, + message: PendingMessage, +) -> int: + """Append a pending message to the session's buffer. + + Returns the new buffer length. Enforces ``MAX_PENDING_MESSAGES`` by + trimming from the left (oldest) — the newest message always wins if + the user has been typing faster than the copilot can drain. + """ + redis = await get_redis_async() + key = _buffer_key(session_id) + payload = message.model_dump_json() + + # Push + trim + expire in a pipeline so the three writes land atomically + # enough for this use case (pipelining doesn't guarantee atomicity + # across commands but ordering is preserved). + async with redis.pipeline(transaction=False) as pipe: + pipe.rpush(key, payload) + pipe.ltrim(key, -MAX_PENDING_MESSAGES, -1) + pipe.expire(key, _PENDING_TTL_SECONDS) + pipe.llen(key) + results = await pipe.execute() + + new_length = int(results[-1]) + + # Fire-and-forget notify. Subscribers use this as a wake-up hint; + # the buffer itself is authoritative so a lost notify is harmless. + try: + await redis.publish(_notify_channel(session_id), "1") + except Exception as e: # pragma: no cover + logger.warning("pending_messages: publish failed for %s: %s", session_id, e) + + logger.info( + "pending_messages: pushed message to session=%s (buffer_len=%d)", + session_id, + new_length, + ) + return new_length + + +async def drain_pending_messages(session_id: str) -> list[PendingMessage]: + """Atomically pop all pending messages for *session_id*. + + Returns them in enqueue order (oldest first). Uses ``LPOP`` with a + count so the read+delete is a single Redis round trip. If the list + is empty or missing, returns ``[]``. + """ + redis = await get_redis_async() + key = _buffer_key(session_id) + + # Redis LPOP with count (Redis 6.2+) returns None for missing key, + # empty list if we somehow race an empty key, or the popped items. + # redis-py's async lpop overload with a count collapses the return + # type in pyright; cast the awaitable so strict type-check stays + # clean without changing runtime behaviour. + lpop_result = await cast( + "Any", + redis.lpop(key, MAX_PENDING_MESSAGES), + ) + if not lpop_result: + return [] + raw_popped: list[Any] = list(lpop_result) + + # redis-py may return bytes or str depending on decode_responses. + decoded: list[str] = [ + item.decode("utf-8") if isinstance(item, bytes) else str(item) + for item in raw_popped + ] + + messages: list[PendingMessage] = [] + for payload in decoded: + try: + messages.append(PendingMessage(**json.loads(payload))) + except Exception as e: + logger.warning( + "pending_messages: dropping malformed entry for %s: %s", + session_id, + e, + ) + + if messages: + logger.info( + "pending_messages: drained %d messages for session=%s", + len(messages), + session_id, + ) + return messages + + +async def peek_pending_count(session_id: str) -> int: + """Return the current buffer length without consuming it.""" + redis = await get_redis_async() + length = await cast("Any", redis.llen(_buffer_key(session_id))) + return int(length) + + +async def clear_pending_messages(session_id: str) -> None: + """Drop the session's pending buffer. + + Called at the end of a turn (success or failure) so messages from a + previous turn don't leak into the next one. The buffer may already + have been drained inside the turn — this is a safety net. + """ + redis = await get_redis_async() + await redis.delete(_buffer_key(session_id)) + + +def format_pending_as_user_message(message: PendingMessage) -> dict[str, Any]: + """Shape a ``PendingMessage`` into the OpenAI-format user message dict. + + Used by the baseline tool-call loop when injecting the buffered + message into the conversation. Context/file metadata (if any) is + embedded into the content so the model sees everything in one block. + """ + parts: list[str] = [message.content] + if message.context: + url = message.context.get("url") + if url: + parts.append(f"\n\n[Page URL: {url}]") + page_content = message.context.get("content") + if page_content: + parts.append(f"\n\n[Page content]\n{page_content}") + if message.file_ids: + parts.append( + "\n\n[Attached files]\n" + + "\n".join(f"- file_id={fid}" for fid in message.file_ids) + + "\nUse read_workspace_file with the file_id to access file contents." + ) + return {"role": "user", "content": "".join(parts)} diff --git a/autogpt_platform/backend/backend/copilot/pending_messages_test.py b/autogpt_platform/backend/backend/copilot/pending_messages_test.py new file mode 100644 index 0000000000..b03906f52a --- /dev/null +++ b/autogpt_platform/backend/backend/copilot/pending_messages_test.py @@ -0,0 +1,233 @@ +"""Tests for the copilot pending-messages buffer. + +Uses a fake async Redis client so the tests don't require a real Redis +instance (the backend test suite's DB/Redis fixtures are heavyweight +and pull in the full app startup). +""" + +import json +from typing import Any + +import pytest + +from backend.copilot import pending_messages as pm_module +from backend.copilot.pending_messages import ( + MAX_PENDING_MESSAGES, + PendingMessage, + clear_pending_messages, + drain_pending_messages, + format_pending_as_user_message, + peek_pending_count, + push_pending_message, +) + +# ── Fake Redis ────────────────────────────────────────────────────── + + +class _FakePipeline: + def __init__(self, parent: "_FakeRedis") -> None: + self._parent = parent + self._ops: list[tuple[str, tuple[Any, ...]]] = [] + + async def __aenter__(self) -> "_FakePipeline": + return self + + async def __aexit__(self, *args: object) -> None: + return None + + def rpush(self, key: str, value: Any) -> None: + self._ops.append(("rpush", (key, value))) + + def ltrim(self, key: str, start: int, stop: int) -> None: + self._ops.append(("ltrim", (key, start, stop))) + + def expire(self, key: str, ttl: int) -> None: + self._ops.append(("expire", (key, ttl))) + + def llen(self, key: str) -> None: + self._ops.append(("llen", (key,))) + + async def execute(self) -> list[Any]: + results: list[Any] = [] + for op, args in self._ops: + if op == "rpush": + key, value = args + self._parent.lists.setdefault(key, []).append(value) + results.append(len(self._parent.lists[key])) + elif op == "ltrim": + key, start, stop = args + lst = self._parent.lists.get(key, []) + # Emulate Redis LTRIM (-N, -1) = last N + if start < 0 and stop == -1: + self._parent.lists[key] = lst[start:] + else: + self._parent.lists[key] = lst[start : stop + 1] + results.append(True) + elif op == "expire": + results.append(True) + elif op == "llen": + key = args[0] + results.append(len(self._parent.lists.get(key, []))) + return results + + +class _FakeRedis: + def __init__(self) -> None: + self.lists: dict[str, list[str]] = {} + self.published: list[tuple[str, str]] = [] + + def pipeline(self, transaction: bool = False) -> _FakePipeline: + return _FakePipeline(self) + + async def publish(self, channel: str, payload: str) -> int: + self.published.append((channel, payload)) + return 1 + + async def lpop(self, key: str, count: int) -> list[str] | None: + lst = self.lists.get(key) + if not lst: + return None + popped = lst[:count] + self.lists[key] = lst[count:] + return popped + + async def llen(self, key: str) -> int: + return len(self.lists.get(key, [])) + + async def delete(self, key: str) -> int: + if key in self.lists: + del self.lists[key] + return 1 + return 0 + + +@pytest.fixture() +def fake_redis(monkeypatch: pytest.MonkeyPatch) -> _FakeRedis: + redis = _FakeRedis() + + async def _get_redis_async() -> _FakeRedis: + return redis + + monkeypatch.setattr(pm_module, "get_redis_async", _get_redis_async) + return redis + + +# ── Basic push / drain ────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_push_and_drain_single_message(fake_redis: _FakeRedis) -> None: + length = await push_pending_message("sess1", PendingMessage(content="hello")) + assert length == 1 + assert await peek_pending_count("sess1") == 1 + + drained = await drain_pending_messages("sess1") + assert len(drained) == 1 + assert drained[0].content == "hello" + assert await peek_pending_count("sess1") == 0 + + +@pytest.mark.asyncio +async def test_push_and_drain_preserves_order(fake_redis: _FakeRedis) -> None: + for i in range(3): + await push_pending_message("sess2", PendingMessage(content=f"msg {i}")) + + drained = await drain_pending_messages("sess2") + assert [m.content for m in drained] == ["msg 0", "msg 1", "msg 2"] + + +@pytest.mark.asyncio +async def test_drain_empty_returns_empty_list(fake_redis: _FakeRedis) -> None: + assert await drain_pending_messages("nope") == [] + + +# ── Buffer cap ────────────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_cap_drops_oldest_when_exceeded(fake_redis: _FakeRedis) -> None: + # Push MAX_PENDING_MESSAGES + 3 messages + for i in range(MAX_PENDING_MESSAGES + 3): + await push_pending_message("sess3", PendingMessage(content=f"m{i}")) + + # Buffer should be clamped to MAX + assert await peek_pending_count("sess3") == MAX_PENDING_MESSAGES + + drained = await drain_pending_messages("sess3") + assert len(drained) == MAX_PENDING_MESSAGES + # Oldest 3 dropped — we should only see m3..m(MAX+2) + assert drained[0].content == "m3" + assert drained[-1].content == f"m{MAX_PENDING_MESSAGES + 2}" + + +# ── Clear ─────────────────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_clear_removes_buffer(fake_redis: _FakeRedis) -> None: + await push_pending_message("sess4", PendingMessage(content="x")) + await push_pending_message("sess4", PendingMessage(content="y")) + await clear_pending_messages("sess4") + assert await peek_pending_count("sess4") == 0 + + +@pytest.mark.asyncio +async def test_clear_is_idempotent(fake_redis: _FakeRedis) -> None: + # Clearing an already-empty buffer should not raise + await clear_pending_messages("sess_empty") + await clear_pending_messages("sess_empty") + + +# ── Publish hook ──────────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_push_publishes_notification(fake_redis: _FakeRedis) -> None: + await push_pending_message("sess5", PendingMessage(content="hi")) + assert ("copilot:pending:notify:sess5", "1") in fake_redis.published + + +# ── Format helper ─────────────────────────────────────────────────── + + +def test_format_pending_plain_text() -> None: + msg = PendingMessage(content="just text") + out = format_pending_as_user_message(msg) + assert out == {"role": "user", "content": "just text"} + + +def test_format_pending_with_context_url() -> None: + msg = PendingMessage( + content="see this page", + context={"url": "https://example.com"}, + ) + out = format_pending_as_user_message(msg) + assert out["role"] == "user" + assert "see this page" in out["content"] + assert "https://example.com" in out["content"] + + +def test_format_pending_with_file_ids() -> None: + msg = PendingMessage(content="look here", file_ids=["a", "b"]) + out = format_pending_as_user_message(msg) + assert "file_id=a" in out["content"] + assert "file_id=b" in out["content"] + + +# ── Malformed payload handling ────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_drain_skips_malformed_entries( + fake_redis: _FakeRedis, +) -> None: + # Seed the fake with a mix of valid and malformed payloads + fake_redis.lists["copilot:pending:bad"] = [ + json.dumps({"content": "valid"}), + "{not valid json", + json.dumps({"content": "also valid", "file_ids": ["a"]}), + ] + drained = await drain_pending_messages("bad") + assert len(drained) == 2 + assert drained[0].content == "valid" + assert drained[1].content == "also valid" diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py index c2a60a8ba0..feaaabe0ce 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service.py @@ -34,6 +34,11 @@ from opentelemetry import trace as otel_trace from pydantic import BaseModel from backend.copilot.context import get_workspace_manager +from backend.copilot.pending_messages import ( + PendingMessage, + clear_pending_messages, + drain_pending_messages, +) from backend.copilot.permissions import apply_tool_permissions from backend.copilot.rate_limit import get_user_tier from backend.copilot.transcript import ( @@ -213,6 +218,25 @@ def _is_prompt_too_long(err: BaseException) -> bool: return False +def _combine_pending_messages(pending: list[PendingMessage]) -> str: + """Merge drained pending messages into a single user-message body. + + The Claude Agent SDK's ``client.query()`` takes a plain string (or + an async iterable); the simplest way to preserve ordering across + multiple drained messages is to concatenate them with a separator + and send a single ``query()`` call. If there's only one message, + its ``content`` is returned verbatim so the transcript stays clean. + """ + if len(pending) == 1: + return pending[0].content + parts: list[str] = [] + for idx, msg in enumerate(pending, start=1): + header = f"[Additional message {idx}]" if idx > 1 else "" + body = msg.content + parts.append(f"{header}\n{body}".lstrip("\n") if header else body) + return "\n\n".join(parts) + + def _is_sdk_disconnect_error(exc: BaseException) -> bool: """Return True if *exc* is an expected SDK cleanup error from client disconnect. @@ -1784,6 +1808,39 @@ async def _run_stream_attempt( _msgs_since_flush = 0 if acc.stream_completed: + # Before exiting the loop, check if the user queued any + # follow-up messages while this turn was running. If so, + # send them to the same live SDK client as a new query + # and reset the stream completion state so we keep + # consuming CLI messages. This avoids releasing the + # cluster lock and requeueing — the pending messages + # flow directly into the existing conversation. + pending = await drain_pending_messages(ctx.session_id) + if pending: + logger.info( + "%s Injecting %d pending message(s) mid-turn", + ctx.log_prefix, + len(pending), + ) + injected_text = _combine_pending_messages(pending) + injected_chat_msg = ChatMessage(role="user", content=injected_text) + ctx.session.messages.append(injected_chat_msg) + state.transcript_builder.append_user(content=injected_text) + try: + await asyncio.shield(upsert_chat_session(ctx.session)) + except Exception as persist_err: + logger.warning( + "%s Failed to persist injected pending message: %s", + ctx.log_prefix, + persist_err, + ) + await client.query(injected_text, session_id=ctx.session_id) + # Reset turn-level state so the next ResultMessage + # ends the injected turn cleanly instead of + # re-completing the previous one. + acc.stream_completed = False + _last_real_msg_time = time.monotonic() + continue break finally: await _safe_close_sdk_client(sdk_client, ctx.log_prefix) @@ -2726,6 +2783,18 @@ async def stream_chat_completion_sdk( raise finally: + # Safety net — drop any pending messages still in the buffer. + # During normal completion the mid-turn drain already cleared + # them; this handles early exits (errors, cancellation, retry). + try: + await clear_pending_messages(session_id) + except Exception as _clear_err: + logger.warning( + "Failed to clear pending messages for %s: %s", + session_id, + _clear_err, + ) + # --- Close OTEL context (with cost attributes) --- if _otel_ctx is not None: try: From cafe49f29580dec1b1ae368beed6f81a4bcada80 Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 15:37:40 +0000 Subject: [PATCH 02/30] fix(copilot): address round 1 review on pending-messages feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Critical fix — the SDK mid-stream injection was structurally broken. ``ClaudeSDKClient.receive_response()`` explicitly returns after the first ``ResultMessage``, so re-issuing ``client.query()`` and setting ``acc.stream_completed = False`` could never restart the iteration — the next ``__anext__`` raised ``StopAsyncIteration`` and the injected turn's response was never consumed. Replaced the broken mid-stream path with a turn-start drain that works for both baseline and SDK. ### Changes **Atomic push via Lua EVAL** (``pending_messages.py``) - Replace the ``RPUSH`` + ``LTRIM`` + ``EXPIRE`` + ``LLEN`` pipeline (which was ``transaction=False`` and racy against concurrent ``LPOP``) with a single Lua script so the push is atomic. - Drop the unused ``enqueued_at`` field. - Add 16k ``max_length`` cap on ``PendingMessage.content``. **Baseline path** (``baseline/service.py``) - Drain at turn start (atomic ``LPOP``): any message queued while the session was idle or between turns is picked up before the first LLM call. - Mid-loop drain now skips the final ``tool_call_loop`` yield (``finished_naturally=True``) — draining there would append a user message the loop is about to exit past, silently losing it. - Inject via ``format_pending_as_user_message`` so file IDs + context are preserved in both ``openai_messages`` and the persisted session transcript (previously the DB copy lost file/context metadata). - Remove the ``finally`` ``clear_pending_messages`` — atomic drain at turn start means any late push belongs to the next turn; clearing here would racily clobber it. **SDK path** (``sdk/service.py``) - Remove the broken mid-stream injection block entirely. - Drain at turn start (same atomic ``LPOP``) and merge the drained messages into ``current_message`` before ``_build_query_message``, so the SDK CLI sees them as part of the initial user message. - Remove the ``finally`` ``clear_pending_messages``. - Delete the unused ``_combine_pending_messages`` helper. **Endpoint** (``api/features/chat/routes.py``) - Enforce ``check_rate_limit`` / ``get_global_rate_limits`` — was bypassing per-user daily/weekly token limits that ``/stream`` enforces. - ``QueuePendingMessageRequest`` gets ``extra="forbid"`` and ``message: max_length=16_000``. - Push-first, persist-second: if the Redis push fails we raise 5xx; previously the session DB got an orphan user message with no corresponding queued entry and a retry would duplicate it. - Log a warning when sanitised file IDs drop unknown entries. - Persisted message content now uses ``format_pending_as_user_message`` so the session copy matches what the model actually sees on drain. - Response returns ``buffer_length``, ``max_buffer_length``, and ``turn_in_flight`` so the frontend can show accurate feedback about whether the message will hit the current turn or the next one. **Tests** (``pending_messages_test.py``) - ``_FakeRedis.eval`` emulates the Lua push script so the existing push/drain/cap tests keep working under the new atomic path. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/api/features/chat/routes.py | 101 ++++++++++++++---- .../backend/copilot/baseline/service.py | 61 +++++++---- .../backend/copilot/pending_messages.py | 48 ++++++--- .../backend/copilot/pending_messages_test.py | 19 ++++ .../backend/backend/copilot/sdk/service.py | 92 +++++----------- 5 files changed, 199 insertions(+), 122 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index a1eebdd6e3..b2269b0964 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -32,6 +32,7 @@ from backend.copilot.model import ( from backend.copilot.pending_messages import ( MAX_PENDING_MESSAGES, PendingMessage, + format_pending_as_user_message, push_pending_message, ) from backend.copilot.rate_limit import ( @@ -133,15 +134,28 @@ class QueuePendingMessageRequest(BaseModel): rounds. """ - message: str = Field(min_length=1) - context: dict[str, str] | None = None + model_config = ConfigDict(extra="forbid") + + message: str = Field(min_length=1, max_length=16_000) + context: dict[str, str] | None = Field( + default=None, + description="Optional page context: expected keys are 'url' and 'content'.", + ) file_ids: list[str] | None = Field(default=None, max_length=20) class QueuePendingMessageResponse(BaseModel): + """Response for the pending-message endpoint. + + Clients should rely on ``queued`` / ``buffer_length`` / ``turn_in_flight`` + — the ``detail`` field is human-readable and may change without notice. + """ + queued: bool buffer_length: int - message: str + max_buffer_length: int + turn_in_flight: bool + detail: str class CreateSessionRequest(BaseModel): @@ -1051,32 +1065,44 @@ async def queue_pending_message( When a user sends a follow-up message while a turn is still streaming, we don't want to block them or start a separate turn — - this endpoint appends the message to a per-session pending buffer - that the executor currently processing the turn will drain between - tool-call rounds, injecting it into the conversation before the - model's next LLM call. + this endpoint appends the message to a per-session pending buffer. + The executor currently running the turn (baseline path) drains the + buffer between tool-call rounds and appends the message to the + conversation before the next LLM call. On the SDK path the buffer + is drained at the *start* of the next turn (the long-lived + ``ClaudeSDKClient.receive_response`` iterator returns after a + ``ResultMessage`` so there is no safe point to inject mid-stream + into an existing connection). - Returns 202 with the new buffer length on success. If the buffer - is full (``MAX_PENDING_MESSAGES``), the oldest pending message is - evicted to make room for the new one — the newest message always - wins. - - Intended for the frontend "send while streaming" flow. If no turn - is currently in flight the message is still queued — the next turn - the user starts will pick it up before its first LLM call. + Returns 202. Enforces the same per-user daily/weekly token rate + limit as the regular ``/stream`` endpoint so a client can't bypass + it by batching messages through here. """ await _validate_and_get_session(session_id, user_id) - # Persist the message to the session immediately so it shows up in - # the transcript even before the executor drains the buffer. - chat_msg = ChatMessage(role="user", content=request.message) + # Pre-turn rate-limit check — mirrors stream_chat_post. Without + # this, a client could bypass per-turn token limits by batching + # their extra context through this endpoint while a cheap stream + # is in flight. + if user_id: + try: + daily_limit, weekly_limit, _tier = await get_global_rate_limits( + user_id, config.daily_token_limit, config.weekly_token_limit + ) + await check_rate_limit( + user_id=user_id, + daily_token_limit=daily_limit, + weekly_token_limit=weekly_limit, + ) + except RateLimitExceeded as e: + raise HTTPException(status_code=429, detail=str(e)) from e + if user_id: track_user_message( user_id=user_id, session_id=session_id, message_length=len(request.message), ) - await append_and_save_message(session_id, chat_msg) # Sanitise file IDs to the user's own workspace (same logic as # stream_chat_post) so injection doesn't surface other users' files. @@ -1093,7 +1119,18 @@ async def queue_pending_message( } ) sanitized_file_ids = [wf.id for wf in files] + if len(sanitized_file_ids) != len(valid_ids): + logger.warning( + "queue_pending_message: dropped %d file id(s) not in " + "caller's workspace (session=%s)", + len(valid_ids) - len(sanitized_file_ids), + session_id, + ) + # Push to Redis BEFORE writing to the session DB. If the push + # fails we raise 5xx and the client retries; ``append_and_save_message`` + # would otherwise leave an orphan user message persisted with no + # corresponding queued pending entry, and a retry would duplicate it. pending = PendingMessage( content=request.message, file_ids=sanitized_file_ids, @@ -1101,12 +1138,32 @@ async def queue_pending_message( ) buffer_length = await push_pending_message(session_id, pending) + # Persist the message into the session transcript only after the + # push succeeds. The message content embeds file/context metadata + # via format_pending_as_user_message so the DB copy matches what + # the model will actually see on drain. + chat_msg = ChatMessage( + role="user", + content=format_pending_as_user_message(pending)["content"], + ) + await append_and_save_message(session_id, chat_msg) + + # Check whether a turn is currently running for UX feedback. + active_session = await stream_registry.get_session(session_id) + turn_in_flight = bool(active_session and active_session.status == "running") + return QueuePendingMessageResponse( queued=True, buffer_length=buffer_length, - message=( - f"Queued — will be injected into the current turn " - f"(buffer: {buffer_length}/{MAX_PENDING_MESSAGES})" + max_buffer_length=MAX_PENDING_MESSAGES, + turn_in_flight=turn_in_flight, + detail=( + ( + "Queued — will be injected into the current turn." + if turn_in_flight + else "Queued — will be injected at the start of the next turn." + ) + + f" buffer={buffer_length}/{MAX_PENDING_MESSAGES}" ), ) diff --git a/autogpt_platform/backend/backend/copilot/baseline/service.py b/autogpt_platform/backend/backend/copilot/baseline/service.py index 1658d93eb1..bb800c10c7 100644 --- a/autogpt_platform/backend/backend/copilot/baseline/service.py +++ b/autogpt_platform/backend/backend/copilot/baseline/service.py @@ -36,7 +36,6 @@ from backend.copilot.model import ( upsert_chat_session, ) from backend.copilot.pending_messages import ( - clear_pending_messages, drain_pending_messages, format_pending_as_user_message, ) @@ -933,6 +932,23 @@ async def stream_chat_completion_baseline( message_length=len(message or ""), ) + # Drain any messages the user queued via POST /messages/pending + # while this session was idle (or during a previous turn whose + # mid-loop drains missed them). Atomic LPOP guarantees that a + # concurrent push lands *after* the drain and stays queued for the + # next turn instead of being lost. Prepended to the session so + # the initial LLM call sees them. + drained_at_start = await drain_pending_messages(session_id) + if drained_at_start: + logger.info( + "[Baseline] Draining %d pending message(s) at turn start " "for session %s", + len(drained_at_start), + session_id, + ) + for _pm in drained_at_start: + _content = format_pending_as_user_message(_pm)["content"] + maybe_append_user_message(session, _content, is_user_message=True) + session = await upsert_chat_session(session) # Select model based on the per-request mode. 'fast' downgrades to @@ -1168,16 +1184,32 @@ async def stream_chat_completion_baseline( # Inject any messages the user queued while the turn was # running. ``tool_call_loop`` mutates ``openai_messages`` # in-place, so appending here means the model sees the new - # messages before its next LLM call. Also persist them to - # the ChatSession so they're part of the durable transcript. + # messages on its next LLM call. + # + # IMPORTANT: skip when the loop has already finished (no + # more LLM calls are coming). Draining here would silently + # lose the message because ``tool_call_loop`` is about to + # return on the next ``async for`` step — the user would + # see a 202 from the pending endpoint but the model would + # never actually read the text. Those messages stay in + # the buffer and will be picked up at the start of the + # next turn. + if loop_result is None or loop_result.finished_naturally: + continue pending = await drain_pending_messages(session_id) if pending: for pm in pending: + # ``format_pending_as_user_message`` embeds file + # attachments and context URL/page content into the + # content string so the in-session transcript is + # a faithful copy of what the model actually saw. + formatted = format_pending_as_user_message(pm) + content_for_db = formatted["content"] maybe_append_user_message( - session, pm.content, is_user_message=True + session, content_for_db, is_user_message=True ) - openai_messages.append(format_pending_as_user_message(pm)) - transcript_builder.append_user(content=pm.content) + openai_messages.append(formatted) + transcript_builder.append_user(content=content_for_db) try: await upsert_chat_session(session) except Exception as persist_err: @@ -1234,19 +1266,10 @@ async def stream_chat_completion_baseline( yield StreamError(errorText=error_msg, code="baseline_error") # Still persist whatever we got finally: - # Safety net — if the stream exited early (error, timeout, etc.) - # we may still have queued pending messages in the buffer. Drop - # them so they don't leak into the next turn. During normal - # completion the tool-call loop drain will already have cleared - # the buffer, so this is a no-op in the happy path. - try: - await clear_pending_messages(session_id) - except Exception as clear_err: - logger.warning( - "[Baseline] Failed to clear pending messages for %s: %s", - session_id, - clear_err, - ) + # Pending messages are drained atomically at turn start and + # between tool rounds, so there's nothing to clear in finally. + # Any message pushed after the final drain window stays in the + # buffer and gets picked up at the start of the next turn. # Set cost attributes on OTEL span before closing if _trace_ctx is not None: diff --git a/autogpt_platform/backend/backend/copilot/pending_messages.py b/autogpt_platform/backend/backend/copilot/pending_messages.py index 0930a87e2d..ea0ae6bc4c 100644 --- a/autogpt_platform/backend/backend/copilot/pending_messages.py +++ b/autogpt_platform/backend/backend/copilot/pending_messages.py @@ -23,7 +23,6 @@ buffer is trimmed to the latest ``MAX_PENDING_MESSAGES`` on every push. import json import logging -import time from typing import Any, cast from pydantic import BaseModel, Field @@ -49,11 +48,9 @@ _PENDING_TTL_SECONDS = 3600 # 1 hour — matches stream_ttl default class PendingMessage(BaseModel): """A user message queued for injection into an in-flight turn.""" - content: str = Field(min_length=1) + content: str = Field(min_length=1, max_length=16_000) file_ids: list[str] = Field(default_factory=list) context: dict[str, str] | None = None - # Unix epoch seconds at enqueue time, for ordering and debugging. - enqueued_at: float = Field(default_factory=time.time) def _buffer_key(session_id: str) -> str: @@ -64,31 +61,50 @@ def _notify_channel(session_id: str) -> str: return f"{_PENDING_CHANNEL_PREFIX}{session_id}" +# Lua script: push-then-trim-then-expire-then-length, atomically. +# Running these four commands via a single EVAL guarantees a concurrent +# LPOP drain lands either entirely before the push (returns 0 from +# our earlier LLEN) or entirely after it (sees the new message) — +# never in the middle of a partial state. +_PUSH_LUA = """ +redis.call('RPUSH', KEYS[1], ARGV[1]) +redis.call('LTRIM', KEYS[1], -tonumber(ARGV[2]), -1) +redis.call('EXPIRE', KEYS[1], tonumber(ARGV[3])) +return redis.call('LLEN', KEYS[1]) +""" + + async def push_pending_message( session_id: str, message: PendingMessage, ) -> int: - """Append a pending message to the session's buffer. + """Append a pending message to the session's buffer atomically. Returns the new buffer length. Enforces ``MAX_PENDING_MESSAGES`` by trimming from the left (oldest) — the newest message always wins if the user has been typing faster than the copilot can drain. + + The push + trim + expire + llen are wrapped in a single Lua EVAL so + concurrent LPOP drains from the executor never observe a partial + state. """ redis = await get_redis_async() key = _buffer_key(session_id) payload = message.model_dump_json() - # Push + trim + expire in a pipeline so the three writes land atomically - # enough for this use case (pipelining doesn't guarantee atomicity - # across commands but ordering is preserved). - async with redis.pipeline(transaction=False) as pipe: - pipe.rpush(key, payload) - pipe.ltrim(key, -MAX_PENDING_MESSAGES, -1) - pipe.expire(key, _PENDING_TTL_SECONDS) - pipe.llen(key) - results = await pipe.execute() - - new_length = int(results[-1]) + new_length = int( + await cast( + "Any", + redis.eval( + _PUSH_LUA, + 1, + key, + payload, + str(MAX_PENDING_MESSAGES), + str(_PENDING_TTL_SECONDS), + ), + ) + ) # Fire-and-forget notify. Subscribers use this as a wake-up hint; # the buffer itself is authoritative so a lost notify is harmless. diff --git a/autogpt_platform/backend/backend/copilot/pending_messages_test.py b/autogpt_platform/backend/backend/copilot/pending_messages_test.py index b03906f52a..7fec16c708 100644 --- a/autogpt_platform/backend/backend/copilot/pending_messages_test.py +++ b/autogpt_platform/backend/backend/copilot/pending_messages_test.py @@ -79,6 +79,25 @@ class _FakeRedis: def pipeline(self, transaction: bool = False) -> _FakePipeline: return _FakePipeline(self) + async def eval(self, script: str, num_keys: int, *args: Any) -> Any: + """Emulate the push Lua script. + + The real Lua script runs atomically in Redis; the fake + implementation just runs the equivalent list operations in + order and returns the final LLEN. That's enough to exercise + the cap + ordering invariants the tests care about. + """ + key = args[0] + payload = args[1] + max_len = int(args[2]) + # ARGV[3] is TTL — fake doesn't enforce expiry + lst = self.lists.setdefault(key, []) + lst.append(payload) + if len(lst) > max_len: + # RPUSH + LTRIM(-N, -1) = keep only last N + self.lists[key] = lst[-max_len:] + return len(self.lists[key]) + async def publish(self, channel: str, payload: str) -> int: self.published.append((channel, payload)) return 1 diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py index feaaabe0ce..7d13b24925 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service.py @@ -35,9 +35,8 @@ from pydantic import BaseModel from backend.copilot.context import get_workspace_manager from backend.copilot.pending_messages import ( - PendingMessage, - clear_pending_messages, drain_pending_messages, + format_pending_as_user_message, ) from backend.copilot.permissions import apply_tool_permissions from backend.copilot.rate_limit import get_user_tier @@ -218,25 +217,6 @@ def _is_prompt_too_long(err: BaseException) -> bool: return False -def _combine_pending_messages(pending: list[PendingMessage]) -> str: - """Merge drained pending messages into a single user-message body. - - The Claude Agent SDK's ``client.query()`` takes a plain string (or - an async iterable); the simplest way to preserve ordering across - multiple drained messages is to concatenate them with a separator - and send a single ``query()`` call. If there's only one message, - its ``content`` is returned verbatim so the transcript stays clean. - """ - if len(pending) == 1: - return pending[0].content - parts: list[str] = [] - for idx, msg in enumerate(pending, start=1): - header = f"[Additional message {idx}]" if idx > 1 else "" - body = msg.content - parts.append(f"{header}\n{body}".lstrip("\n") if header else body) - return "\n\n".join(parts) - - def _is_sdk_disconnect_error(exc: BaseException) -> bool: """Return True if *exc* is an expected SDK cleanup error from client disconnect. @@ -1808,39 +1788,6 @@ async def _run_stream_attempt( _msgs_since_flush = 0 if acc.stream_completed: - # Before exiting the loop, check if the user queued any - # follow-up messages while this turn was running. If so, - # send them to the same live SDK client as a new query - # and reset the stream completion state so we keep - # consuming CLI messages. This avoids releasing the - # cluster lock and requeueing — the pending messages - # flow directly into the existing conversation. - pending = await drain_pending_messages(ctx.session_id) - if pending: - logger.info( - "%s Injecting %d pending message(s) mid-turn", - ctx.log_prefix, - len(pending), - ) - injected_text = _combine_pending_messages(pending) - injected_chat_msg = ChatMessage(role="user", content=injected_text) - ctx.session.messages.append(injected_chat_msg) - state.transcript_builder.append_user(content=injected_text) - try: - await asyncio.shield(upsert_chat_session(ctx.session)) - except Exception as persist_err: - logger.warning( - "%s Failed to persist injected pending message: %s", - ctx.log_prefix, - persist_err, - ) - await client.query(injected_text, session_id=ctx.session_id) - # Reset turn-level state so the next ResultMessage - # ends the injected turn cleanly instead of - # re-completing the previous one. - acc.stream_completed = False - _last_real_msg_time = time.monotonic() - continue break finally: await _safe_close_sdk_client(sdk_client, ctx.log_prefix) @@ -2328,6 +2275,28 @@ async def stream_chat_completion_sdk( if last_user: current_message = last_user[-1].content or "" + # Drain any messages the user queued via POST /messages/pending + # while the previous turn was running (or since the session was + # idle). Messages are drained ATOMICALLY — one LPOP with count + # removes them all at once, so a concurrent push lands *after* + # the drain and stays queued for the next turn instead of being + # lost between LPOP and clear. File IDs and context are + # preserved via format_pending_as_user_message. + pending_at_start = await drain_pending_messages(session_id) + if pending_at_start: + logger.info( + "%s Draining %d pending message(s) at turn start", + log_prefix, + len(pending_at_start), + ) + pending_texts: list[str] = [ + format_pending_as_user_message(pm)["content"] for pm in pending_at_start + ] + if current_message.strip(): + current_message = current_message + "\n\n" + "\n\n".join(pending_texts) + else: + current_message = "\n\n".join(pending_texts) + if not current_message.strip(): yield StreamError( errorText="Message cannot be empty.", @@ -2783,17 +2752,10 @@ async def stream_chat_completion_sdk( raise finally: - # Safety net — drop any pending messages still in the buffer. - # During normal completion the mid-turn drain already cleared - # them; this handles early exits (errors, cancellation, retry). - try: - await clear_pending_messages(session_id) - except Exception as _clear_err: - logger.warning( - "Failed to clear pending messages for %s: %s", - session_id, - _clear_err, - ) + # Pending messages are drained atomically at the start of each + # turn (see drain_pending_messages call above), so there's + # nothing to clean up here — any message pushed after that + # point belongs to the next turn. # --- Close OTEL context (with cost attributes) --- if _otel_ctx is not None: From f140e731501ef19837c31254d671ce3ffb2195ff Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 15:57:57 +0000 Subject: [PATCH 03/30] fix(copilot): address round 2 review on pending-messages feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Critical: SDK path was double-injecting. The endpoint persisted the message to ``session.messages`` AND the executor drained it from Redis and concatenated into ``current_message`` — the LLM saw each queued message twice (once via the compacted history / gap context that ``_build_query_message`` pulls from ``session.messages``, once via the new query). Baseline avoided this via ``maybe_append_user_message`` dedup but SDK had no equivalent guard. ### Fix: Redis is the single source of truth - Endpoint no longer persists to ``session.messages``. It only pushes to Redis and returns. - Baseline drain-at-start calls ``maybe_append_user_message`` (dedup is a safety net, not the primary guard). - SDK drain-at-start calls ``maybe_append_user_message`` too, so the durable transcript records the queued messages. The concatenation into ``current_message`` stays so the SDK CLI sees the content in the first user message of the new turn. ### Baseline max-iterations silent-loss — Fixed ``tool_call_loop`` yields ``finished_naturally=False`` when ``iteration == max_iterations`` then returns. Previously the drain only skipped ``finished_naturally=True``, so messages drained on the max-iterations final yield were appended to ``openai_messages`` and silently lost (the loop was already exiting). Now the drain also skips when ``loop_result.iterations >= _MAX_TOOL_ROUNDS``. ### API response cleanup - ``QueuePendingMessageResponse``: dropped ``queued`` (always True) and ``detail`` (human-readable, clients shouldn't parse). Kept ``buffer_length``, ``max_buffer_length``, and ``turn_in_flight``. ### Tests - Removed dead ``_FakePipeline`` class (the code switched to Lua EVAL in round 1 so the pipeline fake was unused). - Added ``test_drain_decodes_bytes_payloads`` so the ``bytes → str`` decode branch in ``drain_pending_messages`` is actually exercised (real redis-py returns bytes when ``decode_responses=False``). - Updated ``_FakeRedis.lists`` type hint to ``list[str | bytes]``. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/api/features/chat/routes.py | 43 ++++------- .../backend/copilot/baseline/service.py | 25 +++++-- .../backend/copilot/pending_messages_test.py | 75 ++++++------------- .../backend/backend/copilot/sdk/service.py | 10 +++ 4 files changed, 65 insertions(+), 88 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index b2269b0964..2e19ea8ca3 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -32,7 +32,6 @@ from backend.copilot.model import ( from backend.copilot.pending_messages import ( MAX_PENDING_MESSAGES, PendingMessage, - format_pending_as_user_message, push_pending_message, ) from backend.copilot.rate_limit import ( @@ -147,15 +146,17 @@ class QueuePendingMessageRequest(BaseModel): class QueuePendingMessageResponse(BaseModel): """Response for the pending-message endpoint. - Clients should rely on ``queued`` / ``buffer_length`` / ``turn_in_flight`` - — the ``detail`` field is human-readable and may change without notice. + - ``buffer_length``: how many messages are now in the session's + pending buffer (after this push) + - ``max_buffer_length``: the per-session cap (server-side constant) + - ``turn_in_flight``: ``True`` if a copilot turn was running when + we checked — purely informational for UX feedback. Even when + ``False`` the message is still queued: the next turn drains it. """ - queued: bool buffer_length: int max_buffer_length: int turn_in_flight: bool - detail: str class CreateSessionRequest(BaseModel): @@ -1127,10 +1128,15 @@ async def queue_pending_message( session_id, ) - # Push to Redis BEFORE writing to the session DB. If the push - # fails we raise 5xx and the client retries; ``append_and_save_message`` - # would otherwise leave an orphan user message persisted with no - # corresponding queued pending entry, and a retry would duplicate it. + # Redis is the single source of truth for pending messages. We do + # NOT persist to ``session.messages`` here — the drain-at-start + # path in the baseline/SDK executor is the sole writer for pending + # content. Persisting both here AND in the drain would cause + # double injection (executor sees the message in ``session.messages`` + # *and* drains it from Redis) unless we also dedupe. The dedup in + # ``maybe_append_user_message`` only checks trailing same-role + # repeats, so relying on it is fragile. Keeping the endpoint + # Redis-only avoids the whole consistency-bug class. pending = PendingMessage( content=request.message, file_ids=sanitized_file_ids, @@ -1138,33 +1144,14 @@ async def queue_pending_message( ) buffer_length = await push_pending_message(session_id, pending) - # Persist the message into the session transcript only after the - # push succeeds. The message content embeds file/context metadata - # via format_pending_as_user_message so the DB copy matches what - # the model will actually see on drain. - chat_msg = ChatMessage( - role="user", - content=format_pending_as_user_message(pending)["content"], - ) - await append_and_save_message(session_id, chat_msg) - # Check whether a turn is currently running for UX feedback. active_session = await stream_registry.get_session(session_id) turn_in_flight = bool(active_session and active_session.status == "running") return QueuePendingMessageResponse( - queued=True, buffer_length=buffer_length, max_buffer_length=MAX_PENDING_MESSAGES, turn_in_flight=turn_in_flight, - detail=( - ( - "Queued — will be injected into the current turn." - if turn_in_flight - else "Queued — will be injected at the start of the next turn." - ) - + f" buffer={buffer_length}/{MAX_PENDING_MESSAGES}" - ), ) diff --git a/autogpt_platform/backend/backend/copilot/baseline/service.py b/autogpt_platform/backend/backend/copilot/baseline/service.py index bb800c10c7..51454918a9 100644 --- a/autogpt_platform/backend/backend/copilot/baseline/service.py +++ b/autogpt_platform/backend/backend/copilot/baseline/service.py @@ -1187,14 +1187,23 @@ async def stream_chat_completion_baseline( # messages on its next LLM call. # # IMPORTANT: skip when the loop has already finished (no - # more LLM calls are coming). Draining here would silently - # lose the message because ``tool_call_loop`` is about to - # return on the next ``async for`` step — the user would - # see a 202 from the pending endpoint but the model would - # never actually read the text. Those messages stay in - # the buffer and will be picked up at the start of the - # next turn. - if loop_result is None or loop_result.finished_naturally: + # more LLM calls are coming). ``tool_call_loop`` yields + # a final ``ToolCallLoopResult`` on both paths: + # - natural finish: ``finished_naturally=True`` + # - hit max_iterations: ``finished_naturally=False`` + # and ``iterations >= max_iterations`` + # In either case the loop is about to return on the next + # ``async for`` step, so draining here would silently + # lose the message (the user sees 202 but the model never + # reads the text). Those messages stay in the buffer and + # get picked up at the start of the next turn. + if loop_result is None: + continue + is_final_yield = ( + loop_result.finished_naturally + or loop_result.iterations >= _MAX_TOOL_ROUNDS + ) + if is_final_yield: continue pending = await drain_pending_messages(session_id) if pending: diff --git a/autogpt_platform/backend/backend/copilot/pending_messages_test.py b/autogpt_platform/backend/backend/copilot/pending_messages_test.py index 7fec16c708..7be4b7e53c 100644 --- a/autogpt_platform/backend/backend/copilot/pending_messages_test.py +++ b/autogpt_platform/backend/backend/copilot/pending_messages_test.py @@ -24,61 +24,14 @@ from backend.copilot.pending_messages import ( # ── Fake Redis ────────────────────────────────────────────────────── -class _FakePipeline: - def __init__(self, parent: "_FakeRedis") -> None: - self._parent = parent - self._ops: list[tuple[str, tuple[Any, ...]]] = [] - - async def __aenter__(self) -> "_FakePipeline": - return self - - async def __aexit__(self, *args: object) -> None: - return None - - def rpush(self, key: str, value: Any) -> None: - self._ops.append(("rpush", (key, value))) - - def ltrim(self, key: str, start: int, stop: int) -> None: - self._ops.append(("ltrim", (key, start, stop))) - - def expire(self, key: str, ttl: int) -> None: - self._ops.append(("expire", (key, ttl))) - - def llen(self, key: str) -> None: - self._ops.append(("llen", (key,))) - - async def execute(self) -> list[Any]: - results: list[Any] = [] - for op, args in self._ops: - if op == "rpush": - key, value = args - self._parent.lists.setdefault(key, []).append(value) - results.append(len(self._parent.lists[key])) - elif op == "ltrim": - key, start, stop = args - lst = self._parent.lists.get(key, []) - # Emulate Redis LTRIM (-N, -1) = last N - if start < 0 and stop == -1: - self._parent.lists[key] = lst[start:] - else: - self._parent.lists[key] = lst[start : stop + 1] - results.append(True) - elif op == "expire": - results.append(True) - elif op == "llen": - key = args[0] - results.append(len(self._parent.lists.get(key, []))) - return results - - class _FakeRedis: def __init__(self) -> None: - self.lists: dict[str, list[str]] = {} + # Values are ``str | bytes`` because real redis-py returns + # bytes when ``decode_responses=False``; the drain path must + # handle both and our tests exercise both. + self.lists: dict[str, list[str | bytes]] = {} self.published: list[tuple[str, str]] = [] - def pipeline(self, transaction: bool = False) -> _FakePipeline: - return _FakePipeline(self) - async def eval(self, script: str, num_keys: int, *args: Any) -> Any: """Emulate the push Lua script. @@ -102,7 +55,7 @@ class _FakeRedis: self.published.append((channel, payload)) return 1 - async def lpop(self, key: str, count: int) -> list[str] | None: + async def lpop(self, key: str, count: int) -> list[str | bytes] | None: lst = self.lists.get(key) if not lst: return None @@ -250,3 +203,21 @@ async def test_drain_skips_malformed_entries( assert len(drained) == 2 assert drained[0].content == "valid" assert drained[1].content == "also valid" + + +@pytest.mark.asyncio +async def test_drain_decodes_bytes_payloads( + fake_redis: _FakeRedis, +) -> None: + """Real redis-py returns ``bytes`` when ``decode_responses=False``. + + Seed the fake with bytes values to exercise the ``decode("utf-8")`` + branch in ``drain_pending_messages`` so a regression there doesn't + slip past CI. + """ + fake_redis.lists["copilot:pending:bytes_sess"] = [ + json.dumps({"content": "from bytes"}).encode("utf-8"), + ] + drained = await drain_pending_messages("bytes_sess") + assert len(drained) == 1 + assert drained[0].content == "from bytes" diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py index 7d13b24925..a43beca39b 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service.py @@ -2282,6 +2282,14 @@ async def stream_chat_completion_sdk( # the drain and stays queued for the next turn instead of being # lost between LPOP and clear. File IDs and context are # preserved via format_pending_as_user_message. + # + # The drained content is concatenated into ``current_message`` + # so the SDK CLI sees it in the new user message, AND appended + # to ``session.messages`` (via ``maybe_append_user_message``, + # which dedupes trailing same-role repeats) so the durable + # transcript records it too. The endpoint deliberately does + # NOT persist to session.messages — Redis is the single source + # of truth until this drain runs. pending_at_start = await drain_pending_messages(session_id) if pending_at_start: logger.info( @@ -2292,6 +2300,8 @@ async def stream_chat_completion_sdk( pending_texts: list[str] = [ format_pending_as_user_message(pm)["content"] for pm in pending_at_start ] + for _pt in pending_texts: + maybe_append_user_message(session, _pt, is_user_message=True) if current_message.strip(): current_message = current_message + "\n\n" + "\n\n".join(pending_texts) else: From 80e580f387d3d78acc47c9ac76b05b53116ed712 Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 16:10:34 +0000 Subject: [PATCH 04/30] fix(baseline): mirror drained pending messages into transcript_builder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round 3 follow-up: the drain-at-start in ``stream_chat_completion_baseline`` persisted pending messages to ``session.messages`` but never called ``transcript_builder.append_user`` for them. A mid-turn transcript upload would be missing the drained text, which could produce a malformed assistant-after-assistant structure on the next turn. The drain block runs BEFORE ``transcript_builder`` is instantiated (which happens after prompt/transcript async setup), so we can't call append_user in the drain block itself. Instead, we remember the drained list and mirror it into the transcript right after the single-message ``transcript_builder.append_user(content=message)`` call near the prompt-build site. Also cleaned up the stray adjacent-string concatenation in the log line (``"...turn start " "for session %s"`` → single string). Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/backend/copilot/baseline/service.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/autogpt_platform/backend/backend/copilot/baseline/service.py b/autogpt_platform/backend/backend/copilot/baseline/service.py index 51454918a9..3858c9694b 100644 --- a/autogpt_platform/backend/backend/copilot/baseline/service.py +++ b/autogpt_platform/backend/backend/copilot/baseline/service.py @@ -936,12 +936,11 @@ async def stream_chat_completion_baseline( # while this session was idle (or during a previous turn whose # mid-loop drains missed them). Atomic LPOP guarantees that a # concurrent push lands *after* the drain and stays queued for the - # next turn instead of being lost. Prepended to the session so - # the initial LLM call sees them. + # next turn instead of being lost. drained_at_start = await drain_pending_messages(session_id) if drained_at_start: logger.info( - "[Baseline] Draining %d pending message(s) at turn start " "for session %s", + "[Baseline] Draining %d pending message(s) at turn start for session %s", len(drained_at_start), session_id, ) @@ -1009,6 +1008,16 @@ async def stream_chat_completion_baseline( if message and is_user_message: transcript_builder.append_user(content=message) + # Mirror any messages drained at turn start (see above) into the + # transcript — otherwise the loaded prior transcript would be + # missing them and a mid-turn upload could leave a malformed + # assistant-after-assistant structure on the next turn. + if drained_at_start: + for _pm in drained_at_start: + transcript_builder.append_user( + content=format_pending_as_user_message(_pm)["content"] + ) + # Generate title for new sessions if is_user_message and not session.title: user_messages = [m for m in session.messages if m.role == "user"] From 51465fbb0229ed9ac257490caf3d49f60eacd3d8 Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 16:15:02 +0000 Subject: [PATCH 05/30] docs(pending_messages): fix two stale comments in pending_messages.py Round 4 review nits: - ``_PUSH_LUA`` block comment mentioned "returns 0 from our earlier LLEN" which was a leftover from an earlier design that had a separate LLEN check. The atomicity guarantee doesn't depend on it. Reworded to describe Redis EVAL serialisation instead. - ``clear_pending_messages`` docstring said "called at the end of a turn" but the finally-block call sites were removed in round 2 when the atomic drain-at-start became the primary consumer. The function is now only an operator/debug escape hatch. Docstring updated to match. No behavioural change. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/backend/copilot/pending_messages.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/autogpt_platform/backend/backend/copilot/pending_messages.py b/autogpt_platform/backend/backend/copilot/pending_messages.py index ea0ae6bc4c..4c62cecf25 100644 --- a/autogpt_platform/backend/backend/copilot/pending_messages.py +++ b/autogpt_platform/backend/backend/copilot/pending_messages.py @@ -62,10 +62,9 @@ def _notify_channel(session_id: str) -> str: # Lua script: push-then-trim-then-expire-then-length, atomically. -# Running these four commands via a single EVAL guarantees a concurrent -# LPOP drain lands either entirely before the push (returns 0 from -# our earlier LLEN) or entirely after it (sees the new message) — -# never in the middle of a partial state. +# Redis serializes EVAL commands, so a concurrent ``LPOP`` drain +# observes either the pre-push or post-push state of the list — never +# a partial state where the RPUSH has landed but LTRIM hasn't run. _PUSH_LUA = """ redis.call('RPUSH', KEYS[1], ARGV[1]) redis.call('LTRIM', KEYS[1], -tonumber(ARGV[2]), -1) @@ -180,9 +179,11 @@ async def peek_pending_count(session_id: str) -> int: async def clear_pending_messages(session_id: str) -> None: """Drop the session's pending buffer. - Called at the end of a turn (success or failure) so messages from a - previous turn don't leak into the next one. The buffer may already - have been drained inside the turn — this is a safety net. + Not called by the normal turn flow — the atomic ``LPOP`` drain at + turn start is the primary consumer, and any push that arrives + after the drain window belongs to the next turn by definition. + Retained as an operator/debug escape hatch for manually clearing a + stuck session and as a fixture in the unit tests. """ redis = await get_redis_async() await redis.delete(_buffer_key(session_id)) From a4dbcf424782c8afb90c22b8ca04624b91b8359f Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 23:29:44 +0700 Subject: [PATCH 06/30] =?UTF-8?q?fix(backend/copilot):=20address=20round-3?= =?UTF-8?q?=20review=20=E2=80=94=20dedup,=20persist,=20guards?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace maybe_append_user_message with direct session.messages.append for pending drain in both baseline mid-loop and SDK drain-at-start: pending messages are atomically popped from Redis and are never stale-cache duplicates, so the dedup is wrong and causes openai_messages/transcript to diverge from the DB record - Add immediate upsert_chat_session after SDK drain-at-start so a crash between drain and finally doesn't lose messages already removed from Redis - Capture _pre_drain_msg_count before the baseline drain-at-start: use it for is_first_turn (prevents pending messages from flipping the flag to False on an actual first turn) and for _load_prior_transcript (prevents the stale-transcript check from firing on every turn that drains pending messages, which would block transcript upload forever) - Remove redundant if user_id: guards in queue_pending_message — user_id is guaranteed non-empty by Security(auth.get_user_id); the guards made the rate-limit check silently optional --- .../backend/api/features/chat/routes.py | 37 +++++++++---------- .../backend/copilot/baseline/service.py | 25 ++++++++++--- .../backend/backend/copilot/sdk/service.py | 26 ++++++++++--- 3 files changed, 58 insertions(+), 30 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index 2e19ea8ca3..6d057b0270 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -1085,30 +1085,29 @@ async def queue_pending_message( # this, a client could bypass per-turn token limits by batching # their extra context through this endpoint while a cheap stream # is in flight. - if user_id: - try: - daily_limit, weekly_limit, _tier = await get_global_rate_limits( - user_id, config.daily_token_limit, config.weekly_token_limit - ) - await check_rate_limit( - user_id=user_id, - daily_token_limit=daily_limit, - weekly_token_limit=weekly_limit, - ) - except RateLimitExceeded as e: - raise HTTPException(status_code=429, detail=str(e)) from e - - if user_id: - track_user_message( - user_id=user_id, - session_id=session_id, - message_length=len(request.message), + # user_id is guaranteed non-empty by Security(auth.get_user_id) — no guard needed. + try: + daily_limit, weekly_limit, _tier = await get_global_rate_limits( + user_id, config.daily_token_limit, config.weekly_token_limit ) + await check_rate_limit( + user_id=user_id, + daily_token_limit=daily_limit, + weekly_token_limit=weekly_limit, + ) + except RateLimitExceeded as e: + raise HTTPException(status_code=429, detail=str(e)) from e + + track_user_message( + user_id=user_id, + session_id=session_id, + message_length=len(request.message), + ) # Sanitise file IDs to the user's own workspace (same logic as # stream_chat_post) so injection doesn't surface other users' files. sanitized_file_ids: list[str] = [] - if request.file_ids and user_id: + if request.file_ids: valid_ids = [fid for fid in request.file_ids if _UUID_RE.match(fid)] if valid_ids: workspace = await get_or_create_workspace(user_id) diff --git a/autogpt_platform/backend/backend/copilot/baseline/service.py b/autogpt_platform/backend/backend/copilot/baseline/service.py index f9b5a7d9ea..f46c31ff21 100644 --- a/autogpt_platform/backend/backend/copilot/baseline/service.py +++ b/autogpt_platform/backend/backend/copilot/baseline/service.py @@ -934,6 +934,10 @@ async def stream_chat_completion_baseline( message_length=len(message or ""), ) + # Capture count *before* the pending drain so is_first_turn and the + # transcript staleness check are not skewed by queued messages. + _pre_drain_msg_count = len(session.messages) + # Drain any messages the user queued via POST /messages/pending # while this session was idle (or during a previous turn whose # mid-loop drains missed them). Atomic LPOP guarantees that a @@ -948,7 +952,10 @@ async def stream_chat_completion_baseline( ) for _pm in drained_at_start: _content = format_pending_as_user_message(_pm)["content"] - maybe_append_user_message(session, _content, is_user_message=True) + # Append directly — pending messages are atomically-popped from + # Redis and are never stale-cache duplicates, so the + # maybe_append_user_message dedup is wrong here. + session.messages.append(ChatMessage(role="user", content=_content)) session = await upsert_chat_session(session) @@ -979,7 +986,9 @@ async def stream_chat_completion_baseline( # Build system prompt only on the first turn to avoid mid-conversation # changes from concurrent chats updating business understanding. - is_first_turn = len(session.messages) <= 1 + # Use the pre-drain count so queued pending messages don't incorrectly + # flip is_first_turn to False on an actual first turn. + is_first_turn = _pre_drain_msg_count <= 1 # Gate context fetch on both first turn AND user message so that assistant- # role calls (e.g. tool-result submissions) on the first turn don't trigger # a needless DB lookup for user understanding. @@ -997,7 +1006,9 @@ async def stream_chat_completion_baseline( _load_prior_transcript( user_id=user_id, session_id=session_id, - session_msg_count=len(session.messages), + # Use pre-drain count so pending messages don't falsely + # mark the stored transcript as stale and prevent upload. + session_msg_count=_pre_drain_msg_count, transcript_builder=transcript_builder, ), prompt_task, @@ -1266,8 +1277,12 @@ async def stream_chat_completion_baseline( # a faithful copy of what the model actually saw. formatted = format_pending_as_user_message(pm) content_for_db = formatted["content"] - maybe_append_user_message( - session, content_for_db, is_user_message=True + # Append directly — pending messages are atomically-popped + # from Redis and are never stale-cache duplicates, so the + # maybe_append_user_message dedup is wrong here and would + # cause openai_messages/transcript to diverge from session. + session.messages.append( + ChatMessage(role="user", content=content_for_db) ) openai_messages.append(formatted) transcript_builder.append_user(content=content_for_db) diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py index 0b3eba5ee0..a818b66d08 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service.py @@ -2291,11 +2291,12 @@ async def stream_chat_completion_sdk( # # The drained content is concatenated into ``current_message`` # so the SDK CLI sees it in the new user message, AND appended - # to ``session.messages`` (via ``maybe_append_user_message``, - # which dedupes trailing same-role repeats) so the durable - # transcript records it too. The endpoint deliberately does - # NOT persist to session.messages — Redis is the single source - # of truth until this drain runs. + # directly to ``session.messages`` (no dedup — pending messages are + # atomically-popped from Redis and are never stale-cache duplicates) + # so the durable transcript records it too. Session is persisted + # immediately after the drain so a crash doesn't lose the messages. + # The endpoint deliberately does NOT persist to session.messages — + # Redis is the single source of truth until this drain runs. pending_at_start = await drain_pending_messages(session_id) if pending_at_start: logger.info( @@ -2307,11 +2308,24 @@ async def stream_chat_completion_sdk( format_pending_as_user_message(pm)["content"] for pm in pending_at_start ] for _pt in pending_texts: - maybe_append_user_message(session, _pt, is_user_message=True) + # Append directly — pending messages are atomically-popped from + # Redis and are never stale-cache duplicates, so the + # maybe_append_user_message dedup is wrong here. + session.messages.append(ChatMessage(role="user", content=_pt)) if current_message.strip(): current_message = current_message + "\n\n" + "\n\n".join(pending_texts) else: current_message = "\n\n".join(pending_texts) + # Persist immediately so a crash between here and the finally block + # doesn't lose messages that were already drained from Redis. + try: + session = await upsert_chat_session(session) + except Exception as _persist_err: + logger.warning( + "%s Failed to persist drained pending messages: %s", + log_prefix, + _persist_err, + ) if not current_message.strip(): yield StreamError( From f8f7df7b0a146ec0f3fd54afbdcc68b77ec61ea4 Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 16:34:08 +0000 Subject: [PATCH 07/30] fix(copilot): address CI failures on pending-messages PR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. SDK retry tests failing with "Event loop is closed" — the drain-at-start call in stream_chat_completion_sdk was reaching the real ``drain_pending_messages`` (which hits Redis) instead of being mocked. Added a ``drain_pending_messages`` stub returning ``[]`` to the shared ``_make_sdk_patches`` helper so all retry-integration tests skip the drain path. 2. API types check failing — the new ``POST /sessions/{id}/messages/pending`` endpoint wasn't reflected in the frontend's ``openapi.json``. Regenerated via ``poetry run export-api-schema --output ../frontend/src/app/api/openapi.json`` and ``pnpm prettier --write``. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../copilot/sdk/retry_scenarios_test.py | 6 ++ .../frontend/src/app/api/openapi.json | 101 ++++++++++++++++++ 2 files changed, 107 insertions(+) diff --git a/autogpt_platform/backend/backend/copilot/sdk/retry_scenarios_test.py b/autogpt_platform/backend/backend/copilot/sdk/retry_scenarios_test.py index fd831214a6..710daf626a 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/retry_scenarios_test.py +++ b/autogpt_platform/backend/backend/copilot/sdk/retry_scenarios_test.py @@ -1031,6 +1031,12 @@ def _make_sdk_patches( ), (f"{_SVC}.upload_transcript", dict(new_callable=AsyncMock)), (f"{_SVC}.get_user_tier", dict(new_callable=AsyncMock, return_value=None)), + # Stub pending-message drain so retry tests don't hit Redis. + # Returns an empty list → no mid-turn injection happens. + ( + f"{_SVC}.drain_pending_messages", + dict(new_callable=AsyncMock, return_value=[]), + ), ] diff --git a/autogpt_platform/frontend/src/app/api/openapi.json b/autogpt_platform/frontend/src/app/api/openapi.json index 446b2eb079..2546df9357 100644 --- a/autogpt_platform/frontend/src/app/api/openapi.json +++ b/autogpt_platform/frontend/src/app/api/openapi.json @@ -1605,6 +1605,56 @@ } } }, + "/api/chat/sessions/{session_id}/messages/pending": { + "post": { + "tags": ["v2", "chat", "chat"], + "summary": "Queue Pending Message", + "description": "Queue a new user message into an in-flight copilot turn.\n\nWhen a user sends a follow-up message while a turn is still\nstreaming, we don't want to block them or start a separate turn —\nthis endpoint appends the message to a per-session pending buffer.\nThe executor currently running the turn (baseline path) drains the\nbuffer between tool-call rounds and appends the message to the\nconversation before the next LLM call. On the SDK path the buffer\nis drained at the *start* of the next turn (the long-lived\n``ClaudeSDKClient.receive_response`` iterator returns after a\n``ResultMessage`` so there is no safe point to inject mid-stream\ninto an existing connection).\n\nReturns 202. Enforces the same per-user daily/weekly token rate\nlimit as the regular ``/stream`` endpoint so a client can't bypass\nit by batching messages through here.", + "operationId": "postV2QueuePendingMessage", + "security": [{ "HTTPBearerJWT": [] }], + "parameters": [ + { + "name": "session_id", + "in": "path", + "required": true, + "schema": { "type": "string", "title": "Session Id" } + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/QueuePendingMessageRequest" + } + } + } + }, + "responses": { + "202": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/QueuePendingMessageResponse" + } + } + } + }, + "401": { + "$ref": "#/components/responses/HTTP401NotAuthenticatedError" + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { "$ref": "#/components/schemas/HTTPValidationError" } + } + } + } + } + } + }, "/api/chat/sessions/{session_id}/stream": { "get": { "tags": ["v2", "chat", "chat"], @@ -12668,6 +12718,57 @@ "required": ["providers", "pagination"], "title": "ProviderResponse" }, + "QueuePendingMessageRequest": { + "properties": { + "message": { + "type": "string", + "maxLength": 16000, + "minLength": 1, + "title": "Message" + }, + "context": { + "anyOf": [ + { + "additionalProperties": { "type": "string" }, + "type": "object" + }, + { "type": "null" } + ], + "title": "Context", + "description": "Optional page context: expected keys are 'url' and 'content'." + }, + "file_ids": { + "anyOf": [ + { + "items": { "type": "string" }, + "type": "array", + "maxItems": 20 + }, + { "type": "null" } + ], + "title": "File Ids" + } + }, + "additionalProperties": false, + "type": "object", + "required": ["message"], + "title": "QueuePendingMessageRequest", + "description": "Request model for queueing a message into an in-flight turn.\n\nUnlike ``StreamChatRequest`` this endpoint does **not** start a new\nturn — the message is appended to a per-session pending buffer that\nthe executor currently processing the turn will drain between tool\nrounds." + }, + "QueuePendingMessageResponse": { + "properties": { + "buffer_length": { "type": "integer", "title": "Buffer Length" }, + "max_buffer_length": { + "type": "integer", + "title": "Max Buffer Length" + }, + "turn_in_flight": { "type": "boolean", "title": "Turn In Flight" } + }, + "type": "object", + "required": ["buffer_length", "max_buffer_length", "turn_in_flight"], + "title": "QueuePendingMessageResponse", + "description": "Response for the pending-message endpoint.\n\n- ``buffer_length``: how many messages are now in the session's\n pending buffer (after this push)\n- ``max_buffer_length``: the per-session cap (server-side constant)\n- ``turn_in_flight``: ``True`` if a copilot turn was running when\n we checked — purely informational for UX feedback. Even when\n ``False`` the message is still queued: the next turn drains it." + }, "RateLimitResetResponse": { "properties": { "success": { "type": "boolean", "title": "Success" }, From 39e89b50a7972ddbae66c9041d3c6e0f118840ff Mon Sep 17 00:00:00 2001 From: majdyz Date: Fri, 10 Apr 2026 16:41:55 +0000 Subject: [PATCH 08/30] fix(copilot): address remaining CI failures on pending-messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. SDK pyright: the inner ``_fetch_transcript`` closure captured ``session`` which pyright couldn't narrow to non-None (the outer scope casts it, but the narrowing doesn't propagate into the nested async function). Added an explicit ``assert session is not None`` at the top of the closure. 2. Lint: re-formatted ``platform_cost_test.py`` — some pre-existing whitespace drift from an upstream merge was tripping Black on CI. Co-Authored-By: Claude Opus 4.6 (1M context) --- autogpt_platform/backend/backend/copilot/sdk/service.py | 1 + autogpt_platform/backend/backend/data/platform_cost_test.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py index a818b66d08..3384fc82f6 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service.py @@ -2046,6 +2046,7 @@ async def stream_chat_completion_sdk( async def _fetch_transcript(): """Download transcript for --resume if applicable.""" + assert session is not None # narrowed at line 1898 if not ( config.claude_agent_use_resume and user_id and len(session.messages) > 1 ): diff --git a/autogpt_platform/backend/backend/data/platform_cost_test.py b/autogpt_platform/backend/backend/data/platform_cost_test.py index 758e97d37b..65b1a20099 100644 --- a/autogpt_platform/backend/backend/data/platform_cost_test.py +++ b/autogpt_platform/backend/backend/data/platform_cost_test.py @@ -35,7 +35,6 @@ class TestUsdToMicrodollars: assert usd_to_microdollars(1.0) == 1_000_000 - class TestMaskEmail: def test_typical_email(self): assert _mask_email("user@example.com") == "us***@example.com" From a7d97dacf33ca8b0ec4bf8e6b584ceebcd887d9f Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 00:00:07 +0700 Subject: [PATCH 09/30] fix(copilot): address review comments on pending-messages PR - Use _pre_drain_msg_count for transcript load gate (len > 1 check) to avoid spurious transcript load on first turn with pending messages - Use _pre_drain_msg_count for Graphiti warm context gate to prevent warm context skip when pending messages are drained at first turn - Add context.url/content length validators to QueuePendingMessageRequest to prevent LLM context-window stuffing (2K url, 32K content caps) - Rename underscore-prefixed active variables (_pm, _content, _pt) to conventional names (pm, content, pt) per Python convention --- .../backend/api/features/chat/routes.py | 23 +++++++++++++++++++ .../backend/copilot/baseline/service.py | 22 ++++++++++-------- .../backend/backend/copilot/sdk/service.py | 4 ++-- 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index 6d057b0270..854ca116fc 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -142,6 +142,29 @@ class QueuePendingMessageRequest(BaseModel): ) file_ids: list[str] | None = Field(default=None, max_length=20) + @field_validator("context") + @classmethod + def _validate_context_length( + cls, v: dict[str, str] | None + ) -> dict[str, str] | None: + if v is None: + return v + # Cap context values to prevent LLM context-window stuffing via + # large page payloads (url: 2 KB, content: 32 KB). + _URL_LIMIT = 2_000 + _CONTENT_LIMIT = 32_000 + url = v.get("url", "") + if len(url) > _URL_LIMIT: + raise ValueError( + f"context.url exceeds maximum length of {_URL_LIMIT} characters" + ) + content = v.get("content", "") + if len(content) > _CONTENT_LIMIT: + raise ValueError( + f"context.content exceeds maximum length of {_CONTENT_LIMIT} characters" + ) + return v + class QueuePendingMessageResponse(BaseModel): """Response for the pending-message endpoint. diff --git a/autogpt_platform/backend/backend/copilot/baseline/service.py b/autogpt_platform/backend/backend/copilot/baseline/service.py index f46c31ff21..4bcdfd80d9 100644 --- a/autogpt_platform/backend/backend/copilot/baseline/service.py +++ b/autogpt_platform/backend/backend/copilot/baseline/service.py @@ -950,12 +950,12 @@ async def stream_chat_completion_baseline( len(drained_at_start), session_id, ) - for _pm in drained_at_start: - _content = format_pending_as_user_message(_pm)["content"] + for pm in drained_at_start: + content = format_pending_as_user_message(pm)["content"] # Append directly — pending messages are atomically-popped from # Redis and are never stale-cache duplicates, so the # maybe_append_user_message dedup is wrong here. - session.messages.append(ChatMessage(role="user", content=_content)) + session.messages.append(ChatMessage(role="user", content=content)) session = await upsert_chat_session(session) @@ -999,8 +999,10 @@ async def stream_chat_completion_baseline( prompt_task = _build_cacheable_system_prompt(None) # Run download + prompt build concurrently — both are independent I/O - # on the request critical path. - if user_id and len(session.messages) > 1: + # on the request critical path. Use the pre-drain count so pending + # messages drained at turn start don't spuriously trigger a transcript + # load on an actual first turn. + if user_id and _pre_drain_msg_count > 1: transcript_covers_prefix, (base_system_prompt, understanding) = ( await asyncio.gather( _load_prior_transcript( @@ -1025,9 +1027,9 @@ async def stream_chat_completion_baseline( # missing them and a mid-turn upload could leave a malformed # assistant-after-assistant structure on the next turn. if drained_at_start: - for _pm in drained_at_start: + for pm in drained_at_start: transcript_builder.append_user( - content=format_pending_as_user_message(_pm)["content"] + content=format_pending_as_user_message(pm)["content"] ) # Generate title for new sessions @@ -1050,8 +1052,10 @@ async def stream_chat_completion_baseline( graphiti_supplement = get_graphiti_supplement() if graphiti_enabled else "" system_prompt = base_system_prompt + get_baseline_supplement() + graphiti_supplement - # Warm context: pre-load relevant facts from Graphiti on first turn - if graphiti_enabled and user_id and len(session.messages) <= 1: + # Warm context: pre-load relevant facts from Graphiti on first turn. + # Use the pre-drain count so pending messages drained at turn start + # don't prevent warm context injection on an actual first turn. + if graphiti_enabled and user_id and _pre_drain_msg_count <= 1: from backend.copilot.graphiti.context import fetch_warm_context warm_ctx = await fetch_warm_context(user_id, message or "") diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py index 3384fc82f6..39299ba14b 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service.py @@ -2308,11 +2308,11 @@ async def stream_chat_completion_sdk( pending_texts: list[str] = [ format_pending_as_user_message(pm)["content"] for pm in pending_at_start ] - for _pt in pending_texts: + for pt in pending_texts: # Append directly — pending messages are atomically-popped from # Redis and are never stale-cache duplicates, so the # maybe_append_user_message dedup is wrong here. - session.messages.append(ChatMessage(role="user", content=_pt)) + session.messages.append(ChatMessage(role="user", content=pt)) if current_message.strip(): current_message = current_message + "\n\n" + "\n\n".join(pending_texts) else: From 5e8345e5eec37fda46e981724c65d562fd22b131 Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 00:06:24 +0700 Subject: [PATCH 10/30] fix(copilot): fix CodeQL false-positive in pending_messages_test Replace broad `url in content` assertion with exact `[Page URL: url]` substring check so CodeQL does not flag it as Incomplete URL Substring Sanitization. --- .../backend/backend/copilot/pending_messages_test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/autogpt_platform/backend/backend/copilot/pending_messages_test.py b/autogpt_platform/backend/backend/copilot/pending_messages_test.py index 7be4b7e53c..7eef559725 100644 --- a/autogpt_platform/backend/backend/copilot/pending_messages_test.py +++ b/autogpt_platform/backend/backend/copilot/pending_messages_test.py @@ -174,9 +174,11 @@ def test_format_pending_with_context_url() -> None: context={"url": "https://example.com"}, ) out = format_pending_as_user_message(msg) + content = out["content"] assert out["role"] == "user" - assert "see this page" in out["content"] - assert "https://example.com" in out["content"] + assert "see this page" in content + # The URL should appear verbatim in the [Page URL: ...] block. + assert "[Page URL: https://example.com]" in content def test_format_pending_with_file_ids() -> None: From d10d14ae745f2b6bf3b5891536dafe4fbb67dfa2 Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 00:10:20 +0700 Subject: [PATCH 11/30] test(copilot): add coverage for pending-message endpoint and URL test - Add 11 tests for QueuePendingMessageRequest validation and the POST /sessions/{id}/messages/pending endpoint covering: - 202 happy path - 422 on empty/oversized message, context.url > 2KB, context.content > 32KB, >20 file_ids - 404 on unknown session - 429 on rate limit exceeded - file_ids scoped to caller's workspace - Fix CodeQL false-positive: replace broad url-in-content assertion with exact [Page URL: url] substring check in pending_messages_test --- .../backend/api/features/chat/routes_test.py | 270 ++++++++++++++++++ 1 file changed, 270 insertions(+) diff --git a/autogpt_platform/backend/backend/api/features/chat/routes_test.py b/autogpt_platform/backend/backend/api/features/chat/routes_test.py index cd87fe611f..18d499047a 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes_test.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes_test.py @@ -579,3 +579,273 @@ class TestStreamChatRequestModeValidation: req = StreamChatRequest(message="hi") assert req.mode is None + + +# ─── QueuePendingMessageRequest validation ──────────────────────────── + + +class TestQueuePendingMessageRequest: + """Unit tests for QueuePendingMessageRequest field validation.""" + + def test_accepts_valid_message(self) -> None: + from backend.api.features.chat.routes import QueuePendingMessageRequest + + req = QueuePendingMessageRequest(message="hello") + assert req.message == "hello" + + def test_rejects_empty_message(self) -> None: + import pydantic + + from backend.api.features.chat.routes import QueuePendingMessageRequest + + with pytest.raises(pydantic.ValidationError): + QueuePendingMessageRequest(message="") + + def test_rejects_message_over_limit(self) -> None: + import pydantic + + from backend.api.features.chat.routes import QueuePendingMessageRequest + + with pytest.raises(pydantic.ValidationError): + QueuePendingMessageRequest(message="x" * 16_001) + + def test_accepts_valid_context(self) -> None: + from backend.api.features.chat.routes import QueuePendingMessageRequest + + req = QueuePendingMessageRequest( + message="hi", + context={"url": "https://example.com", "content": "page text"}, + ) + assert req.context is not None + assert req.context["url"] == "https://example.com" + + def test_rejects_context_url_over_limit(self) -> None: + import pydantic + + from backend.api.features.chat.routes import QueuePendingMessageRequest + + with pytest.raises(pydantic.ValidationError, match="url"): + QueuePendingMessageRequest( + message="hi", + context={"url": "https://example.com/" + "x" * 2_000}, + ) + + def test_rejects_context_content_over_limit(self) -> None: + import pydantic + + from backend.api.features.chat.routes import QueuePendingMessageRequest + + with pytest.raises(pydantic.ValidationError, match="content"): + QueuePendingMessageRequest( + message="hi", + context={"content": "x" * 32_001}, + ) + + def test_rejects_extra_fields(self) -> None: + """extra='forbid' should reject unknown fields.""" + import pydantic + + from backend.api.features.chat.routes import QueuePendingMessageRequest + + with pytest.raises(pydantic.ValidationError): + QueuePendingMessageRequest(message="hi", unknown_field="bad") # type: ignore[call-arg] + + def test_accepts_up_to_20_file_ids(self) -> None: + from backend.api.features.chat.routes import QueuePendingMessageRequest + + req = QueuePendingMessageRequest( + message="hi", + file_ids=[f"00000000-0000-0000-0000-{i:012d}" for i in range(20)], + ) + assert req.file_ids is not None + assert len(req.file_ids) == 20 + + def test_rejects_more_than_20_file_ids(self) -> None: + import pydantic + + from backend.api.features.chat.routes import QueuePendingMessageRequest + + with pytest.raises(pydantic.ValidationError): + QueuePendingMessageRequest( + message="hi", + file_ids=[f"00000000-0000-0000-0000-{i:012d}" for i in range(21)], + ) + + +# ─── queue_pending_message endpoint ────────────────────────────────── + + +def _mock_pending_internals( + mocker: pytest_mock.MockerFixture, *, session_exists: bool = True +): + """Mock all async dependencies for the pending-message endpoint.""" + if session_exists: + mock_session = mocker.MagicMock() + mock_session.id = "sess-1" + mocker.patch( + "backend.api.features.chat.routes._validate_and_get_session", + new_callable=AsyncMock, + return_value=mock_session, + ) + else: + mocker.patch( + "backend.api.features.chat.routes._validate_and_get_session", + side_effect=fastapi.HTTPException( + status_code=404, detail="Session not found." + ), + ) + mocker.patch( + "backend.api.features.chat.routes.get_global_rate_limits", + new_callable=AsyncMock, + return_value=(0, 0, None), + ) + mocker.patch( + "backend.api.features.chat.routes.check_rate_limit", + new_callable=AsyncMock, + return_value=None, + ) + mocker.patch( + "backend.api.features.chat.routes.track_user_message", + return_value=None, + ) + mocker.patch( + "backend.api.features.chat.routes.push_pending_message", + new_callable=AsyncMock, + return_value=1, + ) + mock_registry = mocker.MagicMock() + mock_registry.get_session = mocker.AsyncMock(return_value=None) + mocker.patch( + "backend.api.features.chat.routes.stream_registry", + mock_registry, + ) + + +def test_queue_pending_message_returns_202(mocker: pytest_mock.MockerFixture) -> None: + """Happy path: valid message returns 202 with buffer_length.""" + _mock_pending_internals(mocker) + + response = client.post( + "/sessions/sess-1/messages/pending", + json={"message": "follow-up"}, + ) + + assert response.status_code == 202 + data = response.json() + assert data["buffer_length"] == 1 + assert data["turn_in_flight"] is False + + +def test_queue_pending_message_empty_body_returns_422() -> None: + """Empty message must be rejected by Pydantic before hitting any route logic.""" + response = client.post( + "/sessions/sess-1/messages/pending", + json={"message": ""}, + ) + assert response.status_code == 422 + + +def test_queue_pending_message_missing_message_returns_422() -> None: + """Missing 'message' field returns 422.""" + response = client.post( + "/sessions/sess-1/messages/pending", + json={}, + ) + assert response.status_code == 422 + + +def test_queue_pending_message_session_not_found_returns_404( + mocker: pytest_mock.MockerFixture, +) -> None: + """If the session doesn't exist or belong to the user, returns 404.""" + _mock_pending_internals(mocker, session_exists=False) + + response = client.post( + "/sessions/bad-sess/messages/pending", + json={"message": "hi"}, + ) + assert response.status_code == 404 + + +def test_queue_pending_message_rate_limited_returns_429( + mocker: pytest_mock.MockerFixture, +) -> None: + """When rate limit is exceeded, endpoint returns 429.""" + from backend.copilot.rate_limit import RateLimitExceeded + + _mock_pending_internals(mocker) + mocker.patch( + "backend.api.features.chat.routes.check_rate_limit", + side_effect=RateLimitExceeded("daily", datetime.now(UTC) + timedelta(hours=1)), + ) + + response = client.post( + "/sessions/sess-1/messages/pending", + json={"message": "hi"}, + ) + assert response.status_code == 429 + + +def test_queue_pending_message_context_url_too_long_returns_422() -> None: + """context.url over 2 KB is rejected.""" + response = client.post( + "/sessions/sess-1/messages/pending", + json={ + "message": "hi", + "context": {"url": "https://example.com/" + "x" * 2_000}, + }, + ) + assert response.status_code == 422 + + +def test_queue_pending_message_context_content_too_long_returns_422() -> None: + """context.content over 32 KB is rejected.""" + response = client.post( + "/sessions/sess-1/messages/pending", + json={ + "message": "hi", + "context": {"content": "x" * 32_001}, + }, + ) + assert response.status_code == 422 + + +def test_queue_pending_message_too_many_file_ids_returns_422() -> None: + """More than 20 file_ids should be rejected.""" + response = client.post( + "/sessions/sess-1/messages/pending", + json={ + "message": "hi", + "file_ids": [f"00000000-0000-0000-0000-{i:012d}" for i in range(21)], + }, + ) + assert response.status_code == 422 + + +def test_queue_pending_message_file_ids_scoped_to_workspace( + mocker: pytest_mock.MockerFixture, +) -> None: + """File IDs must be sanitized to the user's workspace before push.""" + _mock_pending_internals(mocker) + mocker.patch( + "backend.api.features.chat.routes.get_or_create_workspace", + new_callable=AsyncMock, + return_value=type("W", (), {"id": "ws-1"})(), + ) + mock_prisma = mocker.MagicMock() + mock_prisma.find_many = mocker.AsyncMock(return_value=[]) + mocker.patch( + "prisma.models.UserWorkspaceFile.prisma", + return_value=mock_prisma, + ) + fid = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" + + client.post( + "/sessions/sess-1/messages/pending", + json={"message": "hi", "file_ids": [fid, "not-a-uuid"]}, + ) + + call_kwargs = mock_prisma.find_many.call_args[1] + assert call_kwargs["where"]["id"]["in"] == [fid] + assert call_kwargs["where"]["workspaceId"] == "ws-1" + assert call_kwargs["where"]["isDeleted"] is False From 3ef24b32345e579b3db2c0cb145b8d30b477347a Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 00:27:15 +0700 Subject: [PATCH 12/30] refactor(copilot): narrow exception handling and type context field - Replace broad `except Exception` with `except (json.JSONDecodeError, ValidationError, TypeError, ValueError)` in drain_pending_messages so unexpected non-data errors propagate instead of being silently swallowed - Introduce `PendingMessageContext` Pydantic model to replace the raw `dict[str, str]` for the context field, making the url/content contract explicit and enabling typed attribute access instead of .get() calls - Update routes.py to construct PendingMessageContext from the validated request dict before passing to PendingMessage - Update tests to use PendingMessageContext directly Addresses coderabbitai review comments. --- .../backend/api/features/chat/routes.py | 3 ++- .../backend/copilot/pending_messages.py | 23 +++++++++++-------- .../backend/copilot/pending_messages_test.py | 3 ++- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index 854ca116fc..d7ebe04507 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -32,6 +32,7 @@ from backend.copilot.model import ( from backend.copilot.pending_messages import ( MAX_PENDING_MESSAGES, PendingMessage, + PendingMessageContext, push_pending_message, ) from backend.copilot.rate_limit import ( @@ -1162,7 +1163,7 @@ async def queue_pending_message( pending = PendingMessage( content=request.message, file_ids=sanitized_file_ids, - context=request.context, + context=PendingMessageContext(**request.context) if request.context else None, ) buffer_length = await push_pending_message(session_id, pending) diff --git a/autogpt_platform/backend/backend/copilot/pending_messages.py b/autogpt_platform/backend/backend/copilot/pending_messages.py index 4c62cecf25..4c749fe9d8 100644 --- a/autogpt_platform/backend/backend/copilot/pending_messages.py +++ b/autogpt_platform/backend/backend/copilot/pending_messages.py @@ -25,7 +25,7 @@ import json import logging from typing import Any, cast -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, ValidationError from backend.data.redis_client import get_redis_async @@ -45,12 +45,19 @@ _PENDING_CHANNEL_PREFIX = "copilot:pending:notify:" _PENDING_TTL_SECONDS = 3600 # 1 hour — matches stream_ttl default +class PendingMessageContext(BaseModel): + """Structured page context attached to a pending message.""" + + url: str | None = None + content: str | None = None + + class PendingMessage(BaseModel): """A user message queued for injection into an in-flight turn.""" content: str = Field(min_length=1, max_length=16_000) file_ids: list[str] = Field(default_factory=list) - context: dict[str, str] | None = None + context: PendingMessageContext | None = None def _buffer_key(session_id: str) -> str: @@ -153,7 +160,7 @@ async def drain_pending_messages(session_id: str) -> list[PendingMessage]: for payload in decoded: try: messages.append(PendingMessage(**json.loads(payload))) - except Exception as e: + except (json.JSONDecodeError, ValidationError, TypeError, ValueError) as e: logger.warning( "pending_messages: dropping malformed entry for %s: %s", session_id, @@ -198,12 +205,10 @@ def format_pending_as_user_message(message: PendingMessage) -> dict[str, Any]: """ parts: list[str] = [message.content] if message.context: - url = message.context.get("url") - if url: - parts.append(f"\n\n[Page URL: {url}]") - page_content = message.context.get("content") - if page_content: - parts.append(f"\n\n[Page content]\n{page_content}") + if message.context.url: + parts.append(f"\n\n[Page URL: {message.context.url}]") + if message.context.content: + parts.append(f"\n\n[Page content]\n{message.context.content}") if message.file_ids: parts.append( "\n\n[Attached files]\n" diff --git a/autogpt_platform/backend/backend/copilot/pending_messages_test.py b/autogpt_platform/backend/backend/copilot/pending_messages_test.py index 7eef559725..6d0d445feb 100644 --- a/autogpt_platform/backend/backend/copilot/pending_messages_test.py +++ b/autogpt_platform/backend/backend/copilot/pending_messages_test.py @@ -14,6 +14,7 @@ from backend.copilot import pending_messages as pm_module from backend.copilot.pending_messages import ( MAX_PENDING_MESSAGES, PendingMessage, + PendingMessageContext, clear_pending_messages, drain_pending_messages, format_pending_as_user_message, @@ -171,7 +172,7 @@ def test_format_pending_plain_text() -> None: def test_format_pending_with_context_url() -> None: msg = PendingMessage( content="see this page", - context={"url": "https://example.com"}, + context=PendingMessageContext(url="https://example.com"), ) out = format_pending_as_user_message(msg) content = out["content"] From 9da0dd111f3540d62a9da3c60bf0518969da2cac Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 00:31:03 +0700 Subject: [PATCH 13/30] refactor(copilot): extract shared file-ID sanitization helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract `_resolve_workspace_files(user_id, file_ids)` helper from the duplicated UUID-filter + workspace-DB-lookup logic in both `stream_chat_post` and `queue_pending_message`. Both endpoints now call the single helper; callers map the returned `list[UserWorkspaceFile]` to IDs or file-description strings as before. Also removes the redundant `if user_id:` guard from `stream_chat_post`'s file-ID block — `Security(auth.get_user_id)` guarantees a non-empty string. Addresses autogpt-pr-reviewer "Should Fix: Duplicated file-ID sanitization" and coderabbitai nit on the if user_id guard. --- .../backend/api/features/chat/routes.py | 94 ++++++++++--------- 1 file changed, 49 insertions(+), 45 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index d7ebe04507..11d9ebf90f 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -102,6 +102,29 @@ async def _validate_and_get_session( return session +async def _resolve_workspace_files( + user_id: str, + file_ids: list[str], +) -> list[UserWorkspaceFile]: + """Filter *file_ids* to UUID-valid entries that exist in the caller's workspace. + + Returns the matching ``UserWorkspaceFile`` records (empty list if none pass). + Used by both the stream and pending-message endpoints to prevent callers from + referencing other users' files. + """ + valid_ids = [fid for fid in file_ids if _UUID_RE.match(fid)] + if not valid_ids: + return [] + workspace = await get_or_create_workspace(user_id) + return await UserWorkspaceFile.prisma().find_many( + where={ + "id": {"in": valid_ids}, + "workspaceId": workspace.id, + "isDeleted": False, + } + ) + + router = APIRouter( tags=["chat"], ) @@ -850,33 +873,21 @@ async def stream_chat_post( # Also sanitise file_ids so only validated, workspace-scoped IDs are # forwarded downstream (e.g. to the executor via enqueue_copilot_turn). sanitized_file_ids: list[str] | None = None - if request.file_ids and user_id: - # Filter to valid UUIDs only to prevent DB abuse - valid_ids = [fid for fid in request.file_ids if _UUID_RE.match(fid)] - - if valid_ids: - workspace = await get_or_create_workspace(user_id) - # Batch query instead of N+1 - files = await UserWorkspaceFile.prisma().find_many( - where={ - "id": {"in": valid_ids}, - "workspaceId": workspace.id, - "isDeleted": False, - } + if request.file_ids: + files = await _resolve_workspace_files(user_id, request.file_ids) + # Only keep IDs that actually exist in the user's workspace + sanitized_file_ids = [wf.id for wf in files] or None + file_lines: list[str] = [ + f"- {wf.name} ({wf.mimeType}, {round(wf.sizeBytes / 1024, 1)} KB), file_id={wf.id}" + for wf in files + ] + if file_lines: + files_block = ( + "\n\n[Attached files]\n" + + "\n".join(file_lines) + + "\nUse read_workspace_file with the file_id to access file contents." ) - # Only keep IDs that actually exist in the user's workspace - sanitized_file_ids = [wf.id for wf in files] or None - file_lines: list[str] = [ - f"- {wf.name} ({wf.mimeType}, {round(wf.sizeBytes / 1024, 1)} KB), file_id={wf.id}" - for wf in files - ] - if file_lines: - files_block = ( - "\n\n[Attached files]\n" - + "\n".join(file_lines) - + "\nUse read_workspace_file with the file_id to access file contents." - ) - request.message += files_block + request.message += files_block # Atomically append user message to session BEFORE creating task to avoid # race condition where GET_SESSION sees task as "running" but message isn't @@ -1128,28 +1139,21 @@ async def queue_pending_message( message_length=len(request.message), ) - # Sanitise file IDs to the user's own workspace (same logic as - # stream_chat_post) so injection doesn't surface other users' files. + # Sanitise file IDs to the user's own workspace so injection doesn't + # surface other users' files. _resolve_workspace_files handles UUID + # filtering and the workspace-scoped DB lookup. sanitized_file_ids: list[str] = [] if request.file_ids: - valid_ids = [fid for fid in request.file_ids if _UUID_RE.match(fid)] - if valid_ids: - workspace = await get_or_create_workspace(user_id) - files = await UserWorkspaceFile.prisma().find_many( - where={ - "id": {"in": valid_ids}, - "workspaceId": workspace.id, - "isDeleted": False, - } + valid_id_count = sum(1 for fid in request.file_ids if _UUID_RE.match(fid)) + files = await _resolve_workspace_files(user_id, request.file_ids) + sanitized_file_ids = [wf.id for wf in files] + if len(sanitized_file_ids) != valid_id_count: + logger.warning( + "queue_pending_message: dropped %d file id(s) not in " + "caller's workspace (session=%s)", + valid_id_count - len(sanitized_file_ids), + session_id, ) - sanitized_file_ids = [wf.id for wf in files] - if len(sanitized_file_ids) != len(valid_ids): - logger.warning( - "queue_pending_message: dropped %d file id(s) not in " - "caller's workspace (session=%s)", - len(valid_ids) - len(sanitized_file_ids), - session_id, - ) # Redis is the single source of truth for pending messages. We do # NOT persist to ``session.messages`` here — the drain-at-start From 18c75beb7a6b9d35c9dbc1c5ff44e0727c8b5c32 Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 00:33:49 +0700 Subject: [PATCH 14/30] nit(copilot): name pub/sub notify payload constant Replace magic string "1" in redis.publish() with named constant _NOTIFY_PAYLOAD for self-documentation. Addresses autogpt-pr-reviewer nit. --- .../backend/backend/copilot/pending_messages.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/autogpt_platform/backend/backend/copilot/pending_messages.py b/autogpt_platform/backend/backend/copilot/pending_messages.py index 4c749fe9d8..0875a44046 100644 --- a/autogpt_platform/backend/backend/copilot/pending_messages.py +++ b/autogpt_platform/backend/backend/copilot/pending_messages.py @@ -44,6 +44,10 @@ _PENDING_KEY_PREFIX = "copilot:pending:" _PENDING_CHANNEL_PREFIX = "copilot:pending:notify:" _PENDING_TTL_SECONDS = 3600 # 1 hour — matches stream_ttl default +# Payload sent on the pub/sub notify channel. Subscribers treat any +# message as a wake-up hint; the value itself is not meaningful. +_NOTIFY_PAYLOAD = "1" + class PendingMessageContext(BaseModel): """Structured page context attached to a pending message.""" @@ -115,7 +119,7 @@ async def push_pending_message( # Fire-and-forget notify. Subscribers use this as a wake-up hint; # the buffer itself is authoritative so a lost notify is harmless. try: - await redis.publish(_notify_channel(session_id), "1") + await redis.publish(_notify_channel(session_id), _NOTIFY_PAYLOAD) except Exception as e: # pragma: no cover logger.warning("pending_messages: publish failed for %s: %s", session_id, e) From 9bfcdf3f1112d4e5cb0847434c8f9c10b637c973 Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 00:35:27 +0700 Subject: [PATCH 15/30] test(copilot): add combined-fields test for format_pending_as_user_message Verify that content + context (url + content) + file_ids all appear in the formatted output when all fields are present simultaneously. Addresses autogpt-pr-reviewer 'format_pending_as_user_message never tested with all fields simultaneously'. --- .../backend/copilot/pending_messages_test.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/autogpt_platform/backend/backend/copilot/pending_messages_test.py b/autogpt_platform/backend/backend/copilot/pending_messages_test.py index 6d0d445feb..cd3f6b7c43 100644 --- a/autogpt_platform/backend/backend/copilot/pending_messages_test.py +++ b/autogpt_platform/backend/backend/copilot/pending_messages_test.py @@ -189,6 +189,26 @@ def test_format_pending_with_file_ids() -> None: assert "file_id=b" in out["content"] +def test_format_pending_with_all_fields() -> None: + """All fields (content + context url/content + file_ids) should all appear.""" + msg = PendingMessage( + content="summarise this", + context=PendingMessageContext( + url="https://example.com/page", + content="headline text", + ), + file_ids=["f1", "f2"], + ) + out = format_pending_as_user_message(msg) + body = out["content"] + assert out["role"] == "user" + assert "summarise this" in body + assert "[Page URL: https://example.com/page]" in body + assert "[Page content]\nheadline text" in body + assert "file_id=f1" in body + assert "file_id=f2" in body + + # ── Malformed payload handling ────────────────────────────────────── From a7d06854e3f58942f522aaab09328872086e2a0c Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 00:42:25 +0700 Subject: [PATCH 16/30] feat(copilot): add per-user call-frequency rate limit to pending endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The token-budget check guards against over-spending but does not prevent rapid-fire pushes from a client with a large budget. Add a Redis INCR + EXPIRE sliding-window counter (30 calls per 60-second window per user) to cap call frequency independently of token consumption. Returns HTTP 429 with "Too many pending messages" when exceeded. Fails open (Redis unavailable → allows request). Adds test for the new 429 path. Addresses autogpt-pr-reviewer "Should Fix: per-request rate limit". --- .../backend/api/features/chat/routes.py | 28 +++++++++++++++++ .../backend/api/features/chat/routes_test.py | 30 ++++++++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index 11d9ebf90f..3ba03150e0 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -90,6 +90,15 @@ _UUID_RE = re.compile( r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", re.I ) +# Call-frequency cap for the pending-message endpoint. The token-budget +# check in queue_pending_message guards against overspend, but does not +# prevent rapid-fire pushes from a client with a large budget. This cap +# (per user, per 60-second window) limits the rate a caller can hammer the +# endpoint independently of token consumption. +_PENDING_CALL_LIMIT = 30 # pushes per minute per user +_PENDING_CALL_WINDOW_SECONDS = 60 +_PENDING_CALL_KEY_PREFIX = "copilot:pending:calls:" + async def _validate_and_get_session( session_id: str, @@ -1133,6 +1142,25 @@ async def queue_pending_message( except RateLimitExceeded as e: raise HTTPException(status_code=429, detail=str(e)) from e + # Call-frequency cap: prevent rapid-fire pushes that would bypass the + # token-budget check (which only fires per-turn, not per-push). + # Uses a Redis INCR + EXPIRE sliding counter; fails open if Redis is down. + try: + _redis = await get_redis_async() + _call_key = f"{_PENDING_CALL_KEY_PREFIX}{user_id}" + _call_count = await _redis.incr(_call_key) + if _call_count == 1: + await _redis.expire(_call_key, _PENDING_CALL_WINDOW_SECONDS) + if _call_count > _PENDING_CALL_LIMIT: + raise HTTPException( + status_code=429, + detail=f"Too many pending messages: limit is {_PENDING_CALL_LIMIT} per {_PENDING_CALL_WINDOW_SECONDS}s", + ) + except HTTPException: + raise + except Exception: + pass # Redis failure is non-fatal; fail open + track_user_message( user_id=user_id, session_id=session_id, diff --git a/autogpt_platform/backend/backend/api/features/chat/routes_test.py b/autogpt_platform/backend/backend/api/features/chat/routes_test.py index 18d499047a..1254f13302 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes_test.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes_test.py @@ -676,7 +676,10 @@ class TestQueuePendingMessageRequest: def _mock_pending_internals( - mocker: pytest_mock.MockerFixture, *, session_exists: bool = True + mocker: pytest_mock.MockerFixture, + *, + session_exists: bool = True, + call_count: int = 1, ): """Mock all async dependencies for the pending-message endpoint.""" if session_exists: @@ -704,6 +707,15 @@ def _mock_pending_internals( new_callable=AsyncMock, return_value=None, ) + # Mock Redis for per-user call-frequency rate limit + mock_redis = mocker.MagicMock() + mock_redis.incr = mocker.AsyncMock(return_value=call_count) + mock_redis.expire = mocker.AsyncMock(return_value=True) + mocker.patch( + "backend.api.features.chat.routes.get_redis_async", + new_callable=AsyncMock, + return_value=mock_redis, + ) mocker.patch( "backend.api.features.chat.routes.track_user_message", return_value=None, @@ -786,6 +798,22 @@ def test_queue_pending_message_rate_limited_returns_429( assert response.status_code == 429 +def test_queue_pending_message_call_frequency_limit_returns_429( + mocker: pytest_mock.MockerFixture, +) -> None: + """When per-user call frequency limit is exceeded, endpoint returns 429.""" + from backend.api.features.chat.routes import _PENDING_CALL_LIMIT + + _mock_pending_internals(mocker, call_count=_PENDING_CALL_LIMIT + 1) + + response = client.post( + "/sessions/sess-1/messages/pending", + json={"message": "hi"}, + ) + assert response.status_code == 429 + assert "Too many pending messages" in response.json()["detail"] + + def test_queue_pending_message_context_url_too_long_returns_422() -> None: """context.url over 2 KB is rejected.""" response = client.post( From c58176365ffdd3a1f93148d5ce25a61af0e515e5 Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 08:01:15 +0700 Subject: [PATCH 17/30] fix(backend/copilot): use atomic Lua EVAL for pending call-frequency counter Replace separate INCR + EXPIRE with a single Lua EVAL so the rate-limit key can never be orphaned without a TTL. If the process died between the two commands the key would persist indefinitely, permanently locking out the user after hitting the 30-push limit. Fixes sentry bug report on routes.py:1153. --- .../backend/api/features/chat/routes.py | 31 ++++++++++++++++--- .../backend/api/features/chat/routes_test.py | 5 ++- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index 3ba03150e0..7d36fa2485 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -4,7 +4,7 @@ import asyncio import logging import re from collections.abc import AsyncGenerator -from typing import Annotated +from typing import Annotated, Any, cast from uuid import uuid4 from autogpt_libs import auth @@ -99,6 +99,18 @@ _PENDING_CALL_LIMIT = 30 # pushes per minute per user _PENDING_CALL_WINDOW_SECONDS = 60 _PENDING_CALL_KEY_PREFIX = "copilot:pending:calls:" +# Lua script for atomic INCR + conditional EXPIRE. +# Using a single EVAL ensures the counter never persists without a TTL — +# a bare INCR followed by a separate EXPIRE can leave the key without +# an expiry if the process crashes between the two commands. +_CALL_INCR_LUA = """ +local count = redis.call('INCR', KEYS[1]) +if count == 1 then + redis.call('EXPIRE', KEYS[1], tonumber(ARGV[1])) +end +return count +""" + async def _validate_and_get_session( session_id: str, @@ -1144,13 +1156,22 @@ async def queue_pending_message( # Call-frequency cap: prevent rapid-fire pushes that would bypass the # token-budget check (which only fires per-turn, not per-push). - # Uses a Redis INCR + EXPIRE sliding counter; fails open if Redis is down. + # Uses an atomic Lua EVAL (INCR + EXPIRE) so the key can never be + # orphaned without a TTL; fails open if Redis is down. try: _redis = await get_redis_async() _call_key = f"{_PENDING_CALL_KEY_PREFIX}{user_id}" - _call_count = await _redis.incr(_call_key) - if _call_count == 1: - await _redis.expire(_call_key, _PENDING_CALL_WINDOW_SECONDS) + _call_count = int( + await cast( + "Any", + _redis.eval( + _CALL_INCR_LUA, + 1, + _call_key, + str(_PENDING_CALL_WINDOW_SECONDS), + ), + ) + ) if _call_count > _PENDING_CALL_LIMIT: raise HTTPException( status_code=429, diff --git a/autogpt_platform/backend/backend/api/features/chat/routes_test.py b/autogpt_platform/backend/backend/api/features/chat/routes_test.py index 1254f13302..1c2af0c5e2 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes_test.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes_test.py @@ -707,10 +707,9 @@ def _mock_pending_internals( new_callable=AsyncMock, return_value=None, ) - # Mock Redis for per-user call-frequency rate limit + # Mock Redis for per-user call-frequency rate limit (atomic Lua EVAL) mock_redis = mocker.MagicMock() - mock_redis.incr = mocker.AsyncMock(return_value=call_count) - mock_redis.expire = mocker.AsyncMock(return_value=True) + mock_redis.eval = mocker.AsyncMock(return_value=call_count) mocker.patch( "backend.api.features.chat.routes.get_redis_async", new_callable=AsyncMock, From 1d05b06e43d497d2b206498c67bc2d9b163ceb94 Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 08:25:14 +0700 Subject: [PATCH 18/30] fix(backend/copilot): prevent pending message duplication in stale-transcript gap When use_resume=True and the transcript is stale, _build_query_message computes a gap slice from session.messages[transcript_msg_count:-1]. Pending messages drained at turn start are appended to session.messages AND concatenated into current_message, so without the ceiling they appear in both gap_context and current_message. Capture _pre_drain_msg_count before drain_pending_messages() and pass it as session_msg_ceiling to _build_query_message. The gap slice is now bounded at the pre-drain count, preventing pending messages from leaking into the gap. Adds two regression tests in query_builder_test.py. --- .../backend/copilot/sdk/query_builder_test.py | 72 +++++++++++++++++++ .../backend/backend/copilot/sdk/service.py | 31 +++++++- 2 files changed, 101 insertions(+), 2 deletions(-) diff --git a/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py b/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py index 57f037baba..4042dae590 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py +++ b/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py @@ -226,6 +226,78 @@ async def test_build_query_no_resume_multi_message(monkeypatch): assert was_compacted is False # mock returns False +@pytest.mark.asyncio +async def test_build_query_session_msg_ceiling_prevents_pending_duplication(): + """session_msg_ceiling stops pending messages from leaking into the gap. + + Scenario: transcript covers 2 messages, session has 2 historical + 1 current + + 2 pending drained at turn start. Without the ceiling the gap would include + the pending messages AND current_message already has them → duplication. + With session_msg_ceiling=3 (pre-drain count) the gap slice is empty and + only current_message carries the pending content. + """ + # session.messages after drain: [hist1, hist2, current_msg, pending1, pending2] + session = _make_session( + [ + ChatMessage(role="user", content="hist1"), + ChatMessage(role="assistant", content="hist2"), + ChatMessage(role="user", content="current msg with pending1 pending2"), + ChatMessage(role="user", content="pending1"), + ChatMessage(role="user", content="pending2"), + ] + ) + # transcript covers hist1+hist2 (2 messages); pre-drain count was 3 (includes current_msg) + result, was_compacted = await _build_query_message( + "current msg with pending1 pending2", + session, + use_resume=True, + transcript_msg_count=2, + session_id="test-session", + session_msg_ceiling=3, # len(session.messages) before drain + ) + # Gap should be empty (transcript_msg_count == ceiling - 1), so no history prepended + assert result == "current msg with pending1 pending2" + assert was_compacted is False + # Pending messages must NOT appear in gap context + assert "pending1" not in result.split("current msg")[0] + + +@pytest.mark.asyncio +async def test_build_query_session_msg_ceiling_preserves_real_gap(): + """session_msg_ceiling still surfaces a genuine stale-transcript gap. + + Scenario: transcript covers 2 messages, session has 4 historical + 1 current + + 2 pending. Ceiling = 5 (pre-drain). Real gap = messages 2-3 (hist3, hist4). + """ + session = _make_session( + [ + ChatMessage(role="user", content="hist1"), + ChatMessage(role="assistant", content="hist2"), + ChatMessage(role="user", content="hist3"), + ChatMessage(role="assistant", content="hist4"), + ChatMessage(role="user", content="current"), + ChatMessage(role="user", content="pending1"), + ChatMessage(role="user", content="pending2"), + ] + ) + result, was_compacted = await _build_query_message( + "current", + session, + use_resume=True, + transcript_msg_count=2, + session_id="test-session", + session_msg_ceiling=5, # pre-drain: [hist1..hist4, current] + ) + # Gap = session.messages[2:4] = [hist3, hist4] + assert "" in result + assert "hist3" in result + assert "hist4" in result + assert "Now, the user says:\ncurrent" in result + # Pending messages must NOT appear in gap + assert "pending1" not in result + assert "pending2" not in result + + @pytest.mark.asyncio async def test_build_query_no_resume_multi_message_compacted(monkeypatch): """When compression actually compacts, was_compacted should be True.""" diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py index 39299ba14b..4d53611021 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service.py @@ -959,17 +959,33 @@ async def _build_query_message( use_resume: bool, transcript_msg_count: int, session_id: str, + *, + session_msg_ceiling: int | None = None, ) -> tuple[str, bool]: """Build the query message with appropriate context. + Args: + session_msg_ceiling: If provided, treat ``session.messages`` as if it + only has this many entries when computing the gap slice. Pass + ``len(session.messages)`` captured *before* appending any pending + messages so that mid-turn drains do not skew the gap calculation + and cause pending messages to be duplicated in both the gap context + and ``current_message``. + Returns: Tuple of (query_message, was_compacted). """ msg_count = len(session.messages) + # Use the ceiling if supplied (prevents pending-message duplication when + # messages were appended to session.messages after the drain but before + # this function is called). + effective_count = ( + session_msg_ceiling if session_msg_ceiling is not None else msg_count + ) if use_resume and transcript_msg_count > 0: - if transcript_msg_count < msg_count - 1: - gap = session.messages[transcript_msg_count:-1] + if transcript_msg_count < effective_count - 1: + gap = session.messages[transcript_msg_count : effective_count - 1] compressed, was_compressed = await _compress_messages(gap) gap_context = _format_conversation_context(compressed) if gap_context: @@ -2282,6 +2298,15 @@ async def stream_chat_completion_sdk( if last_user: current_message = last_user[-1].content or "" + # Capture the message count *before* draining so _build_query_message + # can compute the gap slice without including the newly-drained pending + # messages. Pending messages are both appended to session.messages AND + # concatenated into current_message; without the ceiling the gap slice + # would extend into the pending messages and duplicate them in the + # model's input context (gap_context + current_message both containing + # them). + _pre_drain_msg_count = len(session.messages) + # Drain any messages the user queued via POST /messages/pending # while the previous turn was running (or since the session was # idle). Messages are drained ATOMICALLY — one LPOP with count @@ -2341,6 +2366,7 @@ async def stream_chat_completion_sdk( use_resume, transcript_msg_count, session_id, + session_msg_ceiling=_pre_drain_msg_count, ) # On the first turn inject user context into the message instead of the # system prompt — the system prompt is now static (same for all users) @@ -2478,6 +2504,7 @@ async def stream_chat_completion_sdk( state.use_resume, state.transcript_msg_count, session_id, + session_msg_ceiling=_pre_drain_msg_count, ) if attachments.hint: state.query_message = f"{state.query_message}\n\n{attachments.hint}" From 6b390d667726d206138a125cd07bd2fd5a0d33f2 Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 08:45:54 +0700 Subject: [PATCH 19/30] fix(backend/copilot): apply session_msg_ceiling to no-resume compression fallback The no-resume fallback in _build_query_message used raw msg_count (> 1) to detect multi-message history and session.messages[:-1] for the compression slice. After a turn-start drain appends pending messages, msg_count is inflated and the fallback fires on what should be a fresh first turn, placing the current user message into the history context and delivering a confusing split prompt to the model. Apply session_msg_ceiling to both branches: - elif condition: effective_count > 1 instead of msg_count > 1 - compression slice: session.messages[:effective_count - 1] instead of [:-1] With _pre_drain_msg_count=1 on a first turn with drained pending messages, effective_count=1 so the fallback is correctly skipped and current_message (which already contains both the original and pending text) is returned as-is. Adds regression test covering the spurious-fallback scenario. --- .../backend/copilot/sdk/query_builder_test.py | 33 +++++++++++++++++++ .../backend/backend/copilot/sdk/service.py | 8 +++-- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py b/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py index 4042dae590..4a7bf01823 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py +++ b/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py @@ -298,6 +298,39 @@ async def test_build_query_session_msg_ceiling_preserves_real_gap(): assert "pending2" not in result +@pytest.mark.asyncio +async def test_build_query_session_msg_ceiling_suppresses_spurious_no_resume_fallback(): + """session_msg_ceiling prevents the no-resume compression fallback from + firing on the first turn of a session when pending messages inflate msg_count. + + Scenario: fresh session (1 message) + 1 pending message drained at turn start. + Without the ceiling: msg_count=2 > 1 → fallback triggers → pending message + leaked into history → wrong context sent to model. + With session_msg_ceiling=1 (pre-drain count): effective_count=1, 1 > 1 is False + → fallback does not trigger → current_message returned as-is. + """ + # session.messages after drain: [current_msg, pending_msg] + session = _make_session( + [ + ChatMessage(role="user", content="What is 2 plus 2?"), + ChatMessage(role="user", content="What is 7 plus 7?"), # pending + ] + ) + result, was_compacted = await _build_query_message( + "What is 2 plus 2?\n\nWhat is 7 plus 7?", + session, + use_resume=False, + transcript_msg_count=0, + session_id="test-session", + session_msg_ceiling=1, # pre-drain: only 1 message existed + ) + # Should return current_message directly without wrapping in history context + assert result == "What is 2 plus 2?\n\nWhat is 7 plus 7?" + assert was_compacted is False + # Pending question must NOT appear in a spurious history section + assert "" not in result + + @pytest.mark.asyncio async def test_build_query_no_resume_multi_message_compacted(monkeypatch): """When compression actually compacts, was_compacted should be True.""" diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py index 4d53611021..88c41f4c51 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service.py @@ -1001,12 +1001,14 @@ async def _build_query_message( f"{gap_context}\n\nNow, the user says:\n{current_message}", was_compressed, ) - elif not use_resume and msg_count > 1: + elif not use_resume and effective_count > 1: logger.warning( f"[SDK] Using compression fallback for session " - f"{session_id} ({msg_count} messages) — no transcript for --resume" + f"{session_id} ({effective_count} messages) — no transcript for --resume" + ) + compressed, was_compressed = await _compress_messages( + session.messages[: effective_count - 1] ) - compressed, was_compressed = await _compress_messages(session.messages[:-1]) history_context = _format_conversation_context(compressed) if history_context: return ( From d49ffac0a1e0920e911bd392bb1cc73a5ea0f6c6 Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 14:55:46 +0000 Subject: [PATCH 20/30] fix(backend/copilot): flush buffered rounds before mid-loop pending drain and wrap turn-start persist Address three review comments on the pending-message PR: 1. (Blocker) Mid-loop pending drain now flushes state.session_messages into session.messages before appending the pending user message, so assistant+tool entries from completed rounds land in chronological order. Without this, the next turn's replay could hit OpenAI tool-call ordering errors (user message interposed between assistant tool_call and its tool result). 2. (Should-Fix) Turn-start upsert_chat_session wrapped in try/except so a transient DB failure doesn't silently lose messages already popped from Redis. Matches the pattern used in mid-loop and SDK drain paths. 3. (Nice-to-Have) Added TestMidLoopPendingFlushOrdering regression test in service_unit_test.py that replays the production flush sequence and asserts chronological ordering of assistant/tool/pending entries. --- .../backend/copilot/baseline/service.py | 29 ++++- .../copilot/baseline/service_unit_test.py | 121 ++++++++++++++++++ 2 files changed, 149 insertions(+), 1 deletion(-) diff --git a/autogpt_platform/backend/backend/copilot/baseline/service.py b/autogpt_platform/backend/backend/copilot/baseline/service.py index 4bcdfd80d9..e5de490984 100644 --- a/autogpt_platform/backend/backend/copilot/baseline/service.py +++ b/autogpt_platform/backend/backend/copilot/baseline/service.py @@ -957,7 +957,19 @@ async def stream_chat_completion_baseline( # maybe_append_user_message dedup is wrong here. session.messages.append(ChatMessage(role="user", content=content)) - session = await upsert_chat_session(session) + # Persist the drained pending messages (if any) plus the current user + # message. Wrap in try/except so a transient DB failure here does not + # silently discard messages that were already popped from Redis — the + # turn can still proceed using the in-memory session.messages, and a + # later resume/replay will backfill from the DB on the next turn. + try: + session = await upsert_chat_session(session) + except Exception as _persist_err: + logger.warning( + "[Baseline] Failed to persist session at turn start " + "(pending drain may not be durable): %s", + _persist_err, + ) # Select model based on the per-request mode. 'fast' downgrades to # the cheaper/faster model; everything else keeps the default. @@ -1274,6 +1286,21 @@ async def stream_chat_completion_baseline( continue pending = await drain_pending_messages(session_id) if pending: + # Flush any buffered assistant/tool messages from completed + # rounds into session.messages BEFORE appending the pending + # user message. ``_baseline_conversation_updater`` only + # records assistant+tool rounds into ``state.session_messages`` + # — they are normally batch-flushed in the finally block. + # Without this in-order flush, the mid-loop pending user + # message lands before the preceding round's assistant/tool + # entries, producing chronologically-wrong session.messages + # on persist (user interposed between an assistant tool_call + # and its tool-result), which breaks OpenAI tool-call ordering + # invariants on the next turn's replay. + for _buffered in state.session_messages: + session.messages.append(_buffered) + state.session_messages.clear() + for pm in pending: # ``format_pending_as_user_message`` embeds file # attachments and context URL/page content into the diff --git a/autogpt_platform/backend/backend/copilot/baseline/service_unit_test.py b/autogpt_platform/backend/backend/copilot/baseline/service_unit_test.py index ba1374b720..057530732e 100644 --- a/autogpt_platform/backend/backend/copilot/baseline/service_unit_test.py +++ b/autogpt_platform/backend/backend/copilot/baseline/service_unit_test.py @@ -828,3 +828,124 @@ class TestBaselineCostExtraction: # response was never assigned so cost extraction must not raise assert state.cost_usd is None + + +class TestMidLoopPendingFlushOrdering: + """Regression test for the mid-loop pending drain ordering invariant. + + ``_baseline_conversation_updater`` records assistant+tool entries from + each tool-call round into ``state.session_messages``; the finally block + of ``stream_chat_completion_baseline`` batch-flushes them into + ``session.messages`` at the end of the turn. + + The mid-loop pending drain appends pending user messages directly to + ``session.messages``. Without flushing ``state.session_messages`` first, + the pending user message lands BEFORE the preceding round's assistant+ + tool entries in the final persisted ``session.messages`` — which + produces a malformed tool-call/tool-result ordering on the next turn's + replay. + + This test documents the invariant by replaying the production flush + sequence against an in-memory state. + """ + + def test_flush_then_append_preserves_chronological_order(self): + """Mid-loop drain must flush state.session_messages before appending + the pending user message, so the final order matches the + chronological execution order. + """ + # Initial state: user turn already appended by maybe_append_user_message + session_messages: list[ChatMessage] = [ + ChatMessage(role="user", content="original user turn"), + ] + state = _BaselineStreamState() + + # Round 1 completes: conversation_updater buffers assistant+tool + # entries into state.session_messages (but does NOT write to + # session.messages yet). + builder = TranscriptBuilder() + builder.append_user("original user turn") + response = LLMLoopResponse( + response_text="calling search", + tool_calls=[LLMToolCall(id="tc_1", name="search", arguments="{}")], + raw_response=None, + prompt_tokens=0, + completion_tokens=0, + ) + tool_results = [ + ToolCallResult( + tool_call_id="tc_1", tool_name="search", content="search output" + ), + ] + openai_messages: list = [] + _baseline_conversation_updater( + openai_messages, + response, + tool_results=tool_results, + transcript_builder=builder, + state=state, + model="test-model", + ) + # state.session_messages should now hold the round-1 assistant + tool + assert len(state.session_messages) == 2 + assert state.session_messages[0].role == "assistant" + assert state.session_messages[1].role == "tool" + + # --- Mid-loop pending drain (production code pattern) --- + # Flush first, THEN append pending. This is the ordering fix. + for _buffered in state.session_messages: + session_messages.append(_buffered) + state.session_messages.clear() + session_messages.append( + ChatMessage(role="user", content="pending mid-loop message") + ) + + # Round 2 completes: new assistant+tool entries buffer again. + response2 = LLMLoopResponse( + response_text="another call", + tool_calls=[LLMToolCall(id="tc_2", name="calc", arguments="{}")], + raw_response=None, + prompt_tokens=0, + completion_tokens=0, + ) + tool_results2 = [ + ToolCallResult( + tool_call_id="tc_2", tool_name="calc", content="calc output" + ), + ] + _baseline_conversation_updater( + openai_messages, + response2, + tool_results=tool_results2, + transcript_builder=builder, + state=state, + model="test-model", + ) + + # --- Finally-block flush (end of turn) --- + for msg in state.session_messages: + session_messages.append(msg) + + # Assert chronological order: original user, round-1 assistant, + # round-1 tool, pending user, round-2 assistant, round-2 tool. + assert [m.role for m in session_messages] == [ + "user", + "assistant", + "tool", + "user", + "assistant", + "tool", + ] + assert session_messages[0].content == "original user turn" + assert session_messages[3].content == "pending mid-loop message" + # The assistant message carrying tool_call tc_1 must be immediately + # followed by its tool result — no user message interposed. + assert session_messages[1].role == "assistant" + assert session_messages[1].tool_calls is not None + assert session_messages[1].tool_calls[0]["id"] == "tc_1" + assert session_messages[2].role == "tool" + assert session_messages[2].tool_call_id == "tc_1" + # Same invariant for the round after the pending user. + assert session_messages[4].tool_calls is not None + assert session_messages[4].tool_calls[0]["id"] == "tc_2" + assert session_messages[5].tool_call_id == "tc_2" From c70e34c30eaf7512c26d35d1dc009bc564d1fc3f Mon Sep 17 00:00:00 2001 From: majdyz Date: Sat, 11 Apr 2026 15:00:25 +0000 Subject: [PATCH 21/30] fix(backend/copilot): prevent duplicate assistant text after mid-loop pending drain Track _flushed_assistant_text_len on _BaselineStreamState so the finally block only appends assistant text produced AFTER the last mid-loop flush. Without this, state.assistant_text (all rounds) vs state.session_messages (post-flush only) desync caused the startswith(recorded) dedup to fail, duplicating round-1 assistant text in session.messages. Adds regression test in service_unit_test.py. --- .../backend/copilot/baseline/service.py | 15 +++- .../copilot/baseline/service_unit_test.py | 80 +++++++++++++++++++ 2 files changed, 94 insertions(+), 1 deletion(-) diff --git a/autogpt_platform/backend/backend/copilot/baseline/service.py b/autogpt_platform/backend/backend/copilot/baseline/service.py index e5de490984..9a32b6fc65 100644 --- a/autogpt_platform/backend/backend/copilot/baseline/service.py +++ b/autogpt_platform/backend/backend/copilot/baseline/service.py @@ -345,6 +345,11 @@ class _BaselineStreamState: cost_usd: float | None = None thinking_stripper: _ThinkingStripper = field(default_factory=_ThinkingStripper) session_messages: list[ChatMessage] = field(default_factory=list) + # Tracks how much of ``assistant_text`` has already been flushed to + # ``session.messages`` via mid-loop pending drains, so the ``finally`` + # block only appends the *new* assistant text (avoiding duplication of + # round-1 text when round-1 entries were cleared from session_messages). + _flushed_assistant_text_len: int = 0 async def _baseline_llm_caller( @@ -1300,6 +1305,10 @@ async def stream_chat_completion_baseline( for _buffered in state.session_messages: session.messages.append(_buffered) state.session_messages.clear() + # Record how much assistant_text has been covered by the + # structured entries just flushed, so the finally block's + # final-text dedup doesn't re-append rounds already persisted. + state._flushed_assistant_text_len = len(state.assistant_text) for pm in pending: # ``format_pending_as_user_message`` embeds file @@ -1447,7 +1456,11 @@ async def stream_chat_completion_baseline( # no tool calls, i.e. the natural finish). Only add it if the # conversation updater didn't already record it as part of a # tool-call round (which would have empty response_text). - final_text = state.assistant_text + # Only consider assistant text produced AFTER the last mid-loop + # flush. ``_flushed_assistant_text_len`` tracks the prefix already + # persisted via structured session_messages during mid-loop pending + # drains; including it here would duplicate those rounds. + final_text = state.assistant_text[state._flushed_assistant_text_len :] if state.session_messages: # Strip text already captured in tool-call round messages recorded = "".join( diff --git a/autogpt_platform/backend/backend/copilot/baseline/service_unit_test.py b/autogpt_platform/backend/backend/copilot/baseline/service_unit_test.py index 057530732e..b67793076f 100644 --- a/autogpt_platform/backend/backend/copilot/baseline/service_unit_test.py +++ b/autogpt_platform/backend/backend/copilot/baseline/service_unit_test.py @@ -949,3 +949,83 @@ class TestMidLoopPendingFlushOrdering: assert session_messages[4].tool_calls is not None assert session_messages[4].tool_calls[0]["id"] == "tc_2" assert session_messages[5].tool_call_id == "tc_2" + + def test_flushed_assistant_text_len_prevents_duplicate_final_text(self): + """After mid-loop drain clears state.session_messages, the finally + block must not re-append assistant text from rounds already flushed. + + ``state.assistant_text`` accumulates ALL rounds' text, but + ``state.session_messages`` only holds entries from rounds AFTER the + last mid-loop flush. Without ``_flushed_assistant_text_len``, the + ``finally`` block's ``startswith(recorded)`` check fails because + ``recorded`` only covers post-flush rounds, and the full + ``assistant_text`` is appended — duplicating pre-flush rounds. + """ + state = _BaselineStreamState() + session_messages: list[ChatMessage] = [ + ChatMessage(role="user", content="user turn"), + ] + + # Simulate round 1 text accumulation (as _bound_llm_caller does) + state.assistant_text += "calling search" + + # Round 1 conversation_updater buffers structured entries + builder = TranscriptBuilder() + builder.append_user("user turn") + response1 = LLMLoopResponse( + response_text="calling search", + tool_calls=[LLMToolCall(id="tc_1", name="search", arguments="{}")], + raw_response=None, + prompt_tokens=0, + completion_tokens=0, + ) + _baseline_conversation_updater( + [], + response1, + tool_results=[ + ToolCallResult( + tool_call_id="tc_1", tool_name="search", content="result" + ) + ], + transcript_builder=builder, + state=state, + model="test-model", + ) + + # Mid-loop drain: flush + clear + record flushed text length + for _buffered in state.session_messages: + session_messages.append(_buffered) + state.session_messages.clear() + state._flushed_assistant_text_len = len(state.assistant_text) + session_messages.append(ChatMessage(role="user", content="pending message")) + + # Simulate round 2 text accumulation + state.assistant_text += "final answer" + + # Round 2: natural finish (no tool calls → no session_messages entry) + + # --- Finally block logic (production code) --- + for msg in state.session_messages: + session_messages.append(msg) + + final_text = state.assistant_text[state._flushed_assistant_text_len :] + if state.session_messages: + recorded = "".join( + m.content or "" for m in state.session_messages if m.role == "assistant" + ) + if final_text.startswith(recorded): + final_text = final_text[len(recorded) :] + if final_text.strip(): + session_messages.append(ChatMessage(role="assistant", content=final_text)) + + # The final assistant message should only contain round-2 text, + # not the round-1 text that was already flushed mid-loop. + assistant_msgs = [m for m in session_messages if m.role == "assistant"] + # Round-1 structured assistant (from mid-loop flush) + assert assistant_msgs[0].content == "calling search" + assert assistant_msgs[0].tool_calls is not None + # Round-2 final text (from finally block) + assert assistant_msgs[1].content == "final answer" + assert assistant_msgs[1].tool_calls is None + # Crucially: only 2 assistant messages, not 3 (no duplicate) + assert len(assistant_msgs) == 2 From db9eb2913801dfee18c36b987cbd1878d3550431 Mon Sep 17 00:00:00 2001 From: majdyz Date: Sun, 12 Apr 2026 10:13:45 +0000 Subject: [PATCH 22/30] fix(backend): address review findings for pending-message endpoint - Fix off-by-one in rate limit: use >= instead of > for call count check - Move track_user_message() after push_pending_message() so analytics only fires on successful push - Add logger.warning in rate-limiter except-Exception catch instead of silent pass - Use fullmatch instead of match for UUID regex validation - Add extra="forbid" to PendingMessageContext to reject unexpected fields --- .../backend/api/features/chat/routes.py | 20 +++++++++---------- .../backend/copilot/pending_messages.py | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index 7d36fa2485..7b46690bb3 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -133,7 +133,7 @@ async def _resolve_workspace_files( Used by both the stream and pending-message endpoints to prevent callers from referencing other users' files. """ - valid_ids = [fid for fid in file_ids if _UUID_RE.match(fid)] + valid_ids = [fid for fid in file_ids if _UUID_RE.fullmatch(fid)] if not valid_ids: return [] workspace = await get_or_create_workspace(user_id) @@ -1172,7 +1172,7 @@ async def queue_pending_message( ), ) ) - if _call_count > _PENDING_CALL_LIMIT: + if _call_count >= _PENDING_CALL_LIMIT: raise HTTPException( status_code=429, detail=f"Too many pending messages: limit is {_PENDING_CALL_LIMIT} per {_PENDING_CALL_WINDOW_SECONDS}s", @@ -1180,20 +1180,14 @@ async def queue_pending_message( except HTTPException: raise except Exception: - pass # Redis failure is non-fatal; fail open - - track_user_message( - user_id=user_id, - session_id=session_id, - message_length=len(request.message), - ) + logger.warning("queue_pending_message: rate-limit check failed, failing open") # non-fatal # Sanitise file IDs to the user's own workspace so injection doesn't # surface other users' files. _resolve_workspace_files handles UUID # filtering and the workspace-scoped DB lookup. sanitized_file_ids: list[str] = [] if request.file_ids: - valid_id_count = sum(1 for fid in request.file_ids if _UUID_RE.match(fid)) + valid_id_count = sum(1 for fid in request.file_ids if _UUID_RE.fullmatch(fid)) files = await _resolve_workspace_files(user_id, request.file_ids) sanitized_file_ids = [wf.id for wf in files] if len(sanitized_file_ids) != valid_id_count: @@ -1220,6 +1214,12 @@ async def queue_pending_message( ) buffer_length = await push_pending_message(session_id, pending) + track_user_message( + user_id=user_id, + session_id=session_id, + message_length=len(request.message), + ) + # Check whether a turn is currently running for UX feedback. active_session = await stream_registry.get_session(session_id) turn_in_flight = bool(active_session and active_session.status == "running") diff --git a/autogpt_platform/backend/backend/copilot/pending_messages.py b/autogpt_platform/backend/backend/copilot/pending_messages.py index 0875a44046..20f673215d 100644 --- a/autogpt_platform/backend/backend/copilot/pending_messages.py +++ b/autogpt_platform/backend/backend/copilot/pending_messages.py @@ -49,7 +49,7 @@ _PENDING_TTL_SECONDS = 3600 # 1 hour — matches stream_ttl default _NOTIFY_PAYLOAD = "1" -class PendingMessageContext(BaseModel): +class PendingMessageContext(BaseModel, extra="forbid"): """Structured page context attached to a pending message.""" url: str | None = None From 7b783aa03b5b28049d70300adac3ca8816b00f17 Mon Sep 17 00:00:00 2001 From: majdyz Date: Sun, 12 Apr 2026 11:21:23 +0000 Subject: [PATCH 23/30] fix(backend): use PendingMessageContext type in QueuePendingMessageRequest to prevent 500 Change context field from dict[str,str] to PendingMessageContext so Pydantic validates (including extra="forbid") at request parse time, returning a proper 422 instead of an unhandled ValidationError / 500 when the caller sends unexpected keys. --- .../backend/api/features/chat/routes.py | 16 ++++++------ .../frontend/src/app/api/openapi.json | 25 +++++++++++++++---- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index 7b46690bb3..638ea64272 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -181,30 +181,28 @@ class QueuePendingMessageRequest(BaseModel): model_config = ConfigDict(extra="forbid") message: str = Field(min_length=1, max_length=16_000) - context: dict[str, str] | None = Field( + context: PendingMessageContext | None = Field( default=None, - description="Optional page context: expected keys are 'url' and 'content'.", + description="Optional page context with 'url' and 'content' fields.", ) file_ids: list[str] | None = Field(default=None, max_length=20) @field_validator("context") @classmethod def _validate_context_length( - cls, v: dict[str, str] | None - ) -> dict[str, str] | None: + cls, v: PendingMessageContext | None + ) -> PendingMessageContext | None: if v is None: return v # Cap context values to prevent LLM context-window stuffing via # large page payloads (url: 2 KB, content: 32 KB). _URL_LIMIT = 2_000 _CONTENT_LIMIT = 32_000 - url = v.get("url", "") - if len(url) > _URL_LIMIT: + if v.url and len(v.url) > _URL_LIMIT: raise ValueError( f"context.url exceeds maximum length of {_URL_LIMIT} characters" ) - content = v.get("content", "") - if len(content) > _CONTENT_LIMIT: + if v.content and len(v.content) > _CONTENT_LIMIT: raise ValueError( f"context.content exceeds maximum length of {_CONTENT_LIMIT} characters" ) @@ -1210,7 +1208,7 @@ async def queue_pending_message( pending = PendingMessage( content=request.message, file_ids=sanitized_file_ids, - context=PendingMessageContext(**request.context) if request.context else None, + context=request.context, ) buffer_length = await push_pending_message(session_id, pending) diff --git a/autogpt_platform/frontend/src/app/api/openapi.json b/autogpt_platform/frontend/src/app/api/openapi.json index 2546df9357..2001b53f87 100644 --- a/autogpt_platform/frontend/src/app/api/openapi.json +++ b/autogpt_platform/frontend/src/app/api/openapi.json @@ -12718,6 +12718,24 @@ "required": ["providers", "pagination"], "title": "ProviderResponse" }, + "PendingMessageContext": { + "properties": { + "url": { + "anyOf": [{ "type": "string" }, { "type": "null" }], + "title": "Url", + "default": null + }, + "content": { + "anyOf": [{ "type": "string" }, { "type": "null" }], + "title": "Content", + "default": null + } + }, + "additionalProperties": false, + "type": "object", + "title": "PendingMessageContext", + "description": "Structured page context attached to a pending message." + }, "QueuePendingMessageRequest": { "properties": { "message": { @@ -12728,14 +12746,11 @@ }, "context": { "anyOf": [ - { - "additionalProperties": { "type": "string" }, - "type": "object" - }, + { "$ref": "#/components/schemas/PendingMessageContext" }, { "type": "null" } ], "title": "Context", - "description": "Optional page context: expected keys are 'url' and 'content'." + "description": "Optional page context with 'url' and 'content' fields." }, "file_ids": { "anyOf": [ From 5d7fa7c216462eba268709dc0f3ea7bc38457e85 Mon Sep 17 00:00:00 2001 From: majdyz Date: Sun, 12 Apr 2026 11:42:06 +0000 Subject: [PATCH 24/30] fix(backend): update test to use PendingMessageContext attribute access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit context is now a PendingMessageContext object, not a dict — use .url attribute instead of ["url"] subscript. --- .../backend/backend/api/features/chat/routes_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/routes_test.py b/autogpt_platform/backend/backend/api/features/chat/routes_test.py index 1c2af0c5e2..401d73bea3 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes_test.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes_test.py @@ -617,7 +617,7 @@ class TestQueuePendingMessageRequest: context={"url": "https://example.com", "content": "page text"}, ) assert req.context is not None - assert req.context["url"] == "https://example.com" + assert req.context.url == "https://example.com" def test_rejects_context_url_over_limit(self) -> None: import pydantic From f3f598daa3fe824979a24aa069d8f0519491fd95 Mon Sep 17 00:00:00 2001 From: majdyz Date: Sun, 12 Apr 2026 12:10:05 +0000 Subject: [PATCH 25/30] Wrap mid-loop drain_pending_messages in try/except If the Redis drain fails mid-tool-loop, log a warning and treat it as no pending messages rather than crashing the entire copilot turn. --- .../backend/backend/copilot/baseline/service.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/autogpt_platform/backend/backend/copilot/baseline/service.py b/autogpt_platform/backend/backend/copilot/baseline/service.py index 9a32b6fc65..05ece25fe0 100644 --- a/autogpt_platform/backend/backend/copilot/baseline/service.py +++ b/autogpt_platform/backend/backend/copilot/baseline/service.py @@ -1289,7 +1289,15 @@ async def stream_chat_completion_baseline( ) if is_final_yield: continue - pending = await drain_pending_messages(session_id) + try: + pending = await drain_pending_messages(session_id) + except Exception: + logger.warning( + "Mid-loop drain_pending_messages failed for session %s", + session_id, + exc_info=True, + ) + pending = [] if pending: # Flush any buffered assistant/tool messages from completed # rounds into session.messages BEFORE appending the pending From 057412ebee31b61b192bd627e18e116c7e040f4b Mon Sep 17 00:00:00 2001 From: majdyz Date: Sun, 12 Apr 2026 23:14:54 +0000 Subject: [PATCH 26/30] fix(copilot): allow exactly 30 pending calls per window Change >= to > so the 30th call (INCR returns 30) is accepted and only the 31st triggers the 429. --- autogpt_platform/backend/backend/api/features/chat/routes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index 638ea64272..af3753865c 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -1170,7 +1170,7 @@ async def queue_pending_message( ), ) ) - if _call_count >= _PENDING_CALL_LIMIT: + if _call_count > _PENDING_CALL_LIMIT: raise HTTPException( status_code=429, detail=f"Too many pending messages: limit is {_PENDING_CALL_LIMIT} per {_PENDING_CALL_WINDOW_SECONDS}s", From 10980f3799ccec90fa5f964640a368b48aa4624b Mon Sep 17 00:00:00 2001 From: majdyz Date: Mon, 13 Apr 2026 03:57:54 +0000 Subject: [PATCH 27/30] fix(copilot): wrap SDK turn-start drain in try/except, deduplicate format calls, elevate context length constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - sdk/service.py: wrap drain_pending_messages at turn start in try/except; a transient Redis error no longer kills the entire turn (baseline mid-loop drain was already protected, SDK was missed in round 5) - baseline/service.py: pre-compute format_pending_as_user_message content once per drained message and reuse it for both session.messages and transcript_builder — eliminates the redundant second call per message - routes.py: move _URL_LIMIT/_CONTENT_LIMIT out of the validator body into module-level _CONTEXT_URL_MAX_LENGTH/_CONTEXT_CONTENT_MAX_LENGTH so the contract limits are visible to tooling without reading the implementation --- .../backend/api/features/chat/routes.py | 18 +++++++++++------- .../backend/copilot/baseline/service.py | 14 +++++++++----- .../backend/backend/copilot/sdk/service.py | 10 +++++++++- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index af3753865c..92bbf85652 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -99,6 +99,11 @@ _PENDING_CALL_LIMIT = 30 # pushes per minute per user _PENDING_CALL_WINDOW_SECONDS = 60 _PENDING_CALL_KEY_PREFIX = "copilot:pending:calls:" +# Maximum lengths for pending-message context fields (url: 2 KB, content: 32 KB). +# Enforced by QueuePendingMessageRequest._validate_context_length. +_CONTEXT_URL_MAX_LENGTH = 2_000 +_CONTEXT_CONTENT_MAX_LENGTH = 32_000 + # Lua script for atomic INCR + conditional EXPIRE. # Using a single EVAL ensures the counter never persists without a TTL — # a bare INCR followed by a separate EXPIRE can leave the key without @@ -195,16 +200,15 @@ class QueuePendingMessageRequest(BaseModel): if v is None: return v # Cap context values to prevent LLM context-window stuffing via - # large page payloads (url: 2 KB, content: 32 KB). - _URL_LIMIT = 2_000 - _CONTENT_LIMIT = 32_000 - if v.url and len(v.url) > _URL_LIMIT: + # large page payloads. Limits are module-level constants so + # they are visible to callers and documentation. + if v.url and len(v.url) > _CONTEXT_URL_MAX_LENGTH: raise ValueError( - f"context.url exceeds maximum length of {_URL_LIMIT} characters" + f"context.url exceeds maximum length of {_CONTEXT_URL_MAX_LENGTH} characters" ) - if v.content and len(v.content) > _CONTENT_LIMIT: + if v.content and len(v.content) > _CONTEXT_CONTENT_MAX_LENGTH: raise ValueError( - f"context.content exceeds maximum length of {_CONTENT_LIMIT} characters" + f"context.content exceeds maximum length of {_CONTEXT_CONTENT_MAX_LENGTH} characters" ) return v diff --git a/autogpt_platform/backend/backend/copilot/baseline/service.py b/autogpt_platform/backend/backend/copilot/baseline/service.py index 05ece25fe0..224757556d 100644 --- a/autogpt_platform/backend/backend/copilot/baseline/service.py +++ b/autogpt_platform/backend/backend/copilot/baseline/service.py @@ -949,6 +949,10 @@ async def stream_chat_completion_baseline( # concurrent push lands *after* the drain and stays queued for the # next turn instead of being lost. drained_at_start = await drain_pending_messages(session_id) + # Pre-compute formatted content once per message so we don't call + # format_pending_as_user_message twice (once for session.messages and + # once for transcript_builder below). + drained_at_start_content: list[str] = [] if drained_at_start: logger.info( "[Baseline] Draining %d pending message(s) at turn start for session %s", @@ -957,6 +961,7 @@ async def stream_chat_completion_baseline( ) for pm in drained_at_start: content = format_pending_as_user_message(pm)["content"] + drained_at_start_content.append(content) # Append directly — pending messages are atomically-popped from # Redis and are never stale-cache duplicates, so the # maybe_append_user_message dedup is wrong here. @@ -1043,11 +1048,10 @@ async def stream_chat_completion_baseline( # transcript — otherwise the loaded prior transcript would be # missing them and a mid-turn upload could leave a malformed # assistant-after-assistant structure on the next turn. - if drained_at_start: - for pm in drained_at_start: - transcript_builder.append_user( - content=format_pending_as_user_message(pm)["content"] - ) + # Reuse the pre-computed content strings to avoid calling + # format_pending_as_user_message a second time. + for _drained_content in drained_at_start_content: + transcript_builder.append_user(content=_drained_content) # Generate title for new sessions if is_user_message and not session.title: diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py index 88c41f4c51..23dcee83e5 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service.py @@ -2325,7 +2325,15 @@ async def stream_chat_completion_sdk( # immediately after the drain so a crash doesn't lose the messages. # The endpoint deliberately does NOT persist to session.messages — # Redis is the single source of truth until this drain runs. - pending_at_start = await drain_pending_messages(session_id) + try: + pending_at_start = await drain_pending_messages(session_id) + except Exception: + logger.warning( + "%s drain_pending_messages failed at turn start, skipping", + log_prefix, + exc_info=True, + ) + pending_at_start = [] if pending_at_start: logger.info( "%s Draining %d pending message(s) at turn start", From 45f96d5769a75ea3390d416f4baf0ced21a906d5 Mon Sep 17 00:00:00 2001 From: majdyz Date: Mon, 13 Apr 2026 04:24:29 +0000 Subject: [PATCH 28/30] fix(copilot): wrap baseline turn-start drain in try/except; add 404/429 to OpenAPI spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Baseline turn-start drain_pending_messages was unprotected — a transient Redis error would propagate up and kill the entire turn stream, unlike the already-protected mid-loop and SDK paths. Wrap with try/except + fallback to [] so a Redis hiccup degrades gracefully. Also adds 404 (session not found) and 429 (rate-limit exceeded) response codes to the pending endpoint's OpenAPI spec so TypeScript clients can handle these error paths correctly. --- .../backend/backend/copilot/baseline/service.py | 9 ++++++++- autogpt_platform/frontend/src/app/api/openapi.json | 4 +++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/autogpt_platform/backend/backend/copilot/baseline/service.py b/autogpt_platform/backend/backend/copilot/baseline/service.py index 224757556d..ad54b20f97 100644 --- a/autogpt_platform/backend/backend/copilot/baseline/service.py +++ b/autogpt_platform/backend/backend/copilot/baseline/service.py @@ -948,7 +948,14 @@ async def stream_chat_completion_baseline( # mid-loop drains missed them). Atomic LPOP guarantees that a # concurrent push lands *after* the drain and stays queued for the # next turn instead of being lost. - drained_at_start = await drain_pending_messages(session_id) + try: + drained_at_start = await drain_pending_messages(session_id) + except Exception: + logger.warning( + "[Baseline] drain_pending_messages failed at turn start, skipping", + exc_info=True, + ) + drained_at_start = [] # Pre-compute formatted content once per message so we don't call # format_pending_as_user_message twice (once for session.messages and # once for transcript_builder below). diff --git a/autogpt_platform/frontend/src/app/api/openapi.json b/autogpt_platform/frontend/src/app/api/openapi.json index 2001b53f87..49d8ab64a0 100644 --- a/autogpt_platform/frontend/src/app/api/openapi.json +++ b/autogpt_platform/frontend/src/app/api/openapi.json @@ -1644,6 +1644,7 @@ "401": { "$ref": "#/components/responses/HTTP401NotAuthenticatedError" }, + "404": { "description": "Session not found or access denied" }, "422": { "description": "Validation Error", "content": { @@ -1651,7 +1652,8 @@ "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } - } + }, + "429": { "description": "Token rate-limit or call-frequency cap exceeded" } } } }, From 6ccb44e0d55b406e6fa70e71317c646778af7aa2 Mon Sep 17 00:00:00 2001 From: majdyz Date: Mon, 13 Apr 2026 07:04:07 +0000 Subject: [PATCH 29/30] fix(copilot): add 404/429 to route decorator, reformat routes.py, regenerate openapi.json Add responses={404, 429} to the pending endpoint's @router.post decorator so FastAPI auto-generates them in the OpenAPI spec. Previously these were only manually added to openapi.json and the CI schema-check (export + diff) stripped them. Also apply black formatting to the long warning line that was failing the backend lint check. --- .../backend/api/features/chat/routes.py | 8 +++- .../frontend/src/app/api/openapi.json | 48 ++++++++----------- 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index 92bbf85652..023e14f3dc 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -1114,6 +1114,10 @@ async def stream_chat_post( "/sessions/{session_id}/messages/pending", response_model=QueuePendingMessageResponse, status_code=202, + responses={ + 404: {"description": "Session not found or access denied"}, + 429: {"description": "Token rate-limit or call-frequency cap exceeded"}, + }, ) async def queue_pending_message( session_id: str, @@ -1182,7 +1186,9 @@ async def queue_pending_message( except HTTPException: raise except Exception: - logger.warning("queue_pending_message: rate-limit check failed, failing open") # non-fatal + logger.warning( + "queue_pending_message: rate-limit check failed, failing open" + ) # non-fatal # Sanitise file IDs to the user's own workspace so injection doesn't # surface other users' files. _resolve_workspace_files handles UUID diff --git a/autogpt_platform/frontend/src/app/api/openapi.json b/autogpt_platform/frontend/src/app/api/openapi.json index 49d8ab64a0..9d0d9a6e8c 100644 --- a/autogpt_platform/frontend/src/app/api/openapi.json +++ b/autogpt_platform/frontend/src/app/api/openapi.json @@ -1653,7 +1653,9 @@ } } }, - "429": { "description": "Token rate-limit or call-frequency cap exceeded" } + "429": { + "description": "Token rate-limit or call-frequency cap exceeded" + } } } }, @@ -9487,14 +9489,7 @@ }, "CreditTransactionType": { "type": "string", - "enum": [ - "TOP_UP", - "USAGE", - "GRANT", - "REFUND", - "CARD_CHECK", - "SUBSCRIPTION" - ], + "enum": ["TOP_UP", "USAGE", "GRANT", "REFUND", "CARD_CHECK"], "title": "CreditTransactionType" }, "DeleteFileResponse": { @@ -12176,6 +12171,22 @@ "title": "PendingHumanReviewModel", "description": "Response model for pending human review data.\n\nRepresents a human review request that is awaiting user action.\nContains all necessary information for a user to review and approve\nor reject data from a Human-in-the-Loop block execution.\n\nAttributes:\n id: Unique identifier for the review record\n user_id: ID of the user who must perform the review\n node_exec_id: ID of the node execution that created this review\n node_id: ID of the node definition (for grouping reviews from same node)\n graph_exec_id: ID of the graph execution containing the node\n graph_id: ID of the graph template being executed\n graph_version: Version number of the graph template\n payload: The actual data payload awaiting review\n instructions: Instructions or message for the reviewer\n editable: Whether the reviewer can edit the data\n status: Current review status (WAITING, APPROVED, or REJECTED)\n review_message: Optional message from the reviewer\n created_at: Timestamp when review was created\n updated_at: Timestamp when review was last modified\n reviewed_at: Timestamp when review was completed (if applicable)" }, + "PendingMessageContext": { + "properties": { + "url": { + "anyOf": [{ "type": "string" }, { "type": "null" }], + "title": "Url" + }, + "content": { + "anyOf": [{ "type": "string" }, { "type": "null" }], + "title": "Content" + } + }, + "additionalProperties": false, + "type": "object", + "title": "PendingMessageContext", + "description": "Structured page context attached to a pending message." + }, "PlatformCostDashboard": { "properties": { "by_provider": { @@ -12720,24 +12731,6 @@ "required": ["providers", "pagination"], "title": "ProviderResponse" }, - "PendingMessageContext": { - "properties": { - "url": { - "anyOf": [{ "type": "string" }, { "type": "null" }], - "title": "Url", - "default": null - }, - "content": { - "anyOf": [{ "type": "string" }, { "type": "null" }], - "title": "Content", - "default": null - } - }, - "additionalProperties": false, - "type": "object", - "title": "PendingMessageContext", - "description": "Structured page context attached to a pending message." - }, "QueuePendingMessageRequest": { "properties": { "message": { @@ -12751,7 +12744,6 @@ { "$ref": "#/components/schemas/PendingMessageContext" }, { "type": "null" } ], - "title": "Context", "description": "Optional page context with 'url' and 'content' fields." }, "file_ids": { From ca0c95b5936e9d33e5d5f39b576bd3fa791f8732 Mon Sep 17 00:00:00 2001 From: majdyz Date: Mon, 13 Apr 2026 07:13:21 +0000 Subject: [PATCH 30/30] fix(frontend): add SUBSCRIPTION to CreditTransactionType enum in openapi.json Syncs the OpenAPI spec with the Prisma schema which already includes the SUBSCRIPTION enum value in CreditTransactionType. --- autogpt_platform/frontend/src/app/api/openapi.json | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/autogpt_platform/frontend/src/app/api/openapi.json b/autogpt_platform/frontend/src/app/api/openapi.json index 9d0d9a6e8c..1b3b1b75f2 100644 --- a/autogpt_platform/frontend/src/app/api/openapi.json +++ b/autogpt_platform/frontend/src/app/api/openapi.json @@ -9489,7 +9489,14 @@ }, "CreditTransactionType": { "type": "string", - "enum": ["TOP_UP", "USAGE", "GRANT", "REFUND", "CARD_CHECK"], + "enum": [ + "TOP_UP", + "USAGE", + "GRANT", + "REFUND", + "CARD_CHECK", + "SUBSCRIPTION" + ], "title": "CreditTransactionType" }, "DeleteFileResponse": {