mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
## Why
AutoPilot users hit `invalid_request_error` ("thinking or
redacted_thinking blocks in the latest assistant message cannot be
modified") when sessions get long enough to trigger transcript
compaction. The Anthropic API requires thinking blocks in the last
assistant message to be byte-for-byte identical to the original response
— our compaction was flattening them to plain text, destroying the
cryptographic signatures.
Reported in Discord `#breakage` by John Ababseh with session
`31d3f08a-cb94-45eb-9fce-56b3f0287ef4`.
## What
- **`compact_transcript`** now splits the transcript into a compressible
prefix and a preserved tail (last assistant entry + trailing entries).
Only the prefix is compressed; the tail is re-appended verbatim,
preserving thinking blocks exactly.
- **`_flatten_assistant_content`** now silently drops `thinking` and
`redacted_thinking` blocks instead of creating `[__thinking__]`
placeholders — they carry no useful context for compression summaries.
- **`response_adapter`** explicitly handles `ThinkingBlock` (skip
gracefully instead of silently falling through the isinstance chain).
- **`_format_sdk_content_blocks`** now passes through raw dict blocks
(e.g. `redacted_thinking` that the SDK may not have a typed class for)
verbatim to the transcript.
## How
The key insight is the Anthropic API's asymmetric constraint:
- **Last assistant message**: thinking/redacted_thinking blocks must be
preserved byte-for-byte
- **Older assistant messages**: thinking blocks can be removed entirely
`compact_transcript` uses `_find_last_assistant_entry()` to split the
JSONL into two parts:
1. **Prefix** (everything before the last assistant): flattened and
compressed normally
2. **Tail** (last assistant + any trailing user message): preserved
verbatim and re-chained via `_rechain_tail()` to maintain the
`parentUuid` chain
This ensures the API always sees the original thinking blocks in the
last assistant message while still achieving meaningful compression on
older turns.
## Test plan
- [x] 25 new tests across `thinking_blocks_test.py` (TDD: written before
implementation)
- [x] `_find_last_assistant_entry` splits correctly at last assistant,
handles edges (no assistant, index 0, trailing user)
- [x] `_rechain_tail` patches parentUuid chain, handles empty tail
- [x] `_flatten_assistant_content` strips thinking/redacted_thinking
blocks, handles mixed content
- [x] `compact_transcript` preserves last assistant's thinking blocks
- [x] `compact_transcript` strips thinking from older assistant messages
- [x] Edge cases: trailing user message, single assistant, no thinking
blocks
- [x] `response_adapter` handles ThinkingBlock without crash
- [x] `_format_sdk_content_blocks` preserves thinking block format and
raw dict blocks
- [x] All existing copilot SDK tests pass
- [x] Pre-commit hooks (lint, format, typecheck) all pass
95 lines
3.0 KiB
Python
95 lines
3.0 KiB
Python
"""Shared test fixtures for copilot SDK tests."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from unittest.mock import patch
|
|
from uuid import uuid4
|
|
|
|
import pytest
|
|
|
|
from backend.util import json
|
|
|
|
|
|
@pytest.fixture()
|
|
def mock_chat_config():
|
|
"""Mock ChatConfig so compact_transcript tests skip real config lookup."""
|
|
with patch(
|
|
"backend.copilot.config.ChatConfig",
|
|
return_value=type("Cfg", (), {"model": "m", "api_key": "k", "base_url": "u"})(),
|
|
):
|
|
yield
|
|
|
|
|
|
def build_test_transcript(pairs: list[tuple[str, str]]) -> str:
|
|
"""Build a minimal valid JSONL transcript from (role, content) pairs.
|
|
|
|
Use this helper in any copilot SDK test that needs a well-formed
|
|
transcript without hitting the real storage layer.
|
|
|
|
Delegates to ``build_structured_transcript`` — plain content strings
|
|
are automatically wrapped in ``[{"type": "text", "text": ...}]`` for
|
|
assistant messages.
|
|
"""
|
|
# Cast widening: tuple[str, str] is structurally compatible with
|
|
# tuple[str, str | list[dict]] but list invariance requires explicit
|
|
# annotation.
|
|
widened: list[tuple[str, str | list[dict]]] = list(pairs)
|
|
return build_structured_transcript(widened)
|
|
|
|
|
|
def build_structured_transcript(
|
|
entries: list[tuple[str, str | list[dict]]],
|
|
) -> str:
|
|
"""Build a JSONL transcript with structured content blocks.
|
|
|
|
Each entry is (role, content) where content is either a plain string
|
|
(for user messages) or a list of content block dicts (for assistant
|
|
messages with thinking/tool_use/text blocks).
|
|
|
|
Example::
|
|
|
|
build_structured_transcript([
|
|
("user", "Hello"),
|
|
("assistant", [
|
|
{"type": "thinking", "thinking": "...", "signature": "sig1"},
|
|
{"type": "text", "text": "Hi there"},
|
|
]),
|
|
])
|
|
"""
|
|
lines: list[str] = []
|
|
last_uuid: str | None = None
|
|
for role, content in entries:
|
|
uid = str(uuid4())
|
|
entry_type = "assistant" if role == "assistant" else "user"
|
|
if role == "assistant" and isinstance(content, list):
|
|
msg: dict = {
|
|
"role": "assistant",
|
|
"model": "claude-test",
|
|
"id": f"msg_{uid[:8]}",
|
|
"type": "message",
|
|
"content": content,
|
|
"stop_reason": "end_turn",
|
|
"stop_sequence": None,
|
|
}
|
|
elif role == "assistant":
|
|
msg = {
|
|
"role": "assistant",
|
|
"model": "claude-test",
|
|
"id": f"msg_{uid[:8]}",
|
|
"type": "message",
|
|
"content": [{"type": "text", "text": content}],
|
|
"stop_reason": "end_turn",
|
|
"stop_sequence": None,
|
|
}
|
|
else:
|
|
msg = {"role": role, "content": content}
|
|
entry = {
|
|
"type": entry_type,
|
|
"uuid": uid,
|
|
"parentUuid": last_uuid,
|
|
"message": msg,
|
|
}
|
|
lines.append(json.dumps(entry, separators=(",", ":")))
|
|
last_uuid = uid
|
|
return "\n".join(lines) + "\n"
|