mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-14 00:35:02 -05:00
Replace the sdkTranscript TEXT column with WorkspaceStorageBackend
(GCS/local) for persisting Claude Code JSONL transcripts. This removes
the implicit 512KB cap that caused --resume to degrade after a few
tool-heavy turns (JSONL is append-only and never shrinks).
Key changes:
- Strip progress/metadata entries before storing (~30% size reduction)
with parentUuid reparenting for orphaned children
- Upload in background (asyncio.create_task) to avoid blocking SSE
- Size-based conflict guard: never overwrite a larger (newer) transcript
- Validate stripped content before upload
- Log warning when falling back to compression approach
- Enable claude_agent_use_resume by default
- Remove sdkTranscript column from schema, model, and DB layer
- Storage path: chat-transcripts/{user_id}/{session_id}/{session_id}.jsonl
230 lines
8.3 KiB
Python
230 lines
8.3 KiB
Python
"""Unit tests for JSONL transcript management utilities."""
|
|
|
|
import json
|
|
import os
|
|
|
|
from backend.api.features.chat.sdk.transcript import (
|
|
STRIPPABLE_TYPES,
|
|
read_transcript_file,
|
|
strip_progress_entries,
|
|
validate_transcript,
|
|
write_transcript_to_tempfile,
|
|
)
|
|
|
|
|
|
def _make_jsonl(*entries: dict) -> str:
|
|
return "\n".join(json.dumps(e) for e in entries) + "\n"
|
|
|
|
|
|
# --- Fixtures ---
|
|
|
|
|
|
METADATA_LINE = {"type": "queue-operation", "subtype": "create"}
|
|
FILE_HISTORY = {"type": "file-history-snapshot", "files": []}
|
|
USER_MSG = {"type": "user", "uuid": "u1", "message": {"role": "user", "content": "hi"}}
|
|
ASST_MSG = {
|
|
"type": "assistant",
|
|
"uuid": "a1",
|
|
"parentUuid": "u1",
|
|
"message": {"role": "assistant", "content": "hello"},
|
|
}
|
|
PROGRESS_ENTRY = {
|
|
"type": "progress",
|
|
"uuid": "p1",
|
|
"parentUuid": "u1",
|
|
"data": {"type": "bash_progress", "stdout": "running..."},
|
|
}
|
|
|
|
VALID_TRANSCRIPT = _make_jsonl(METADATA_LINE, FILE_HISTORY, USER_MSG, ASST_MSG)
|
|
|
|
|
|
# --- read_transcript_file ---
|
|
|
|
|
|
class TestReadTranscriptFile:
|
|
def test_returns_content_for_valid_file(self, tmp_path):
|
|
path = tmp_path / "session.jsonl"
|
|
path.write_text(VALID_TRANSCRIPT)
|
|
result = read_transcript_file(str(path))
|
|
assert result is not None
|
|
assert "user" in result
|
|
|
|
def test_returns_none_for_missing_file(self):
|
|
assert read_transcript_file("/nonexistent/path.jsonl") is None
|
|
|
|
def test_returns_none_for_empty_path(self):
|
|
assert read_transcript_file("") is None
|
|
|
|
def test_returns_none_for_empty_file(self, tmp_path):
|
|
path = tmp_path / "empty.jsonl"
|
|
path.write_text("")
|
|
assert read_transcript_file(str(path)) is None
|
|
|
|
def test_returns_none_for_metadata_only(self, tmp_path):
|
|
content = _make_jsonl(METADATA_LINE, FILE_HISTORY)
|
|
path = tmp_path / "meta.jsonl"
|
|
path.write_text(content)
|
|
assert read_transcript_file(str(path)) is None
|
|
|
|
def test_returns_none_for_invalid_json(self, tmp_path):
|
|
path = tmp_path / "bad.jsonl"
|
|
path.write_text("not json\n{}\n{}\n")
|
|
assert read_transcript_file(str(path)) is None
|
|
|
|
def test_no_size_limit(self, tmp_path):
|
|
"""Large files are accepted — bucket storage has no size limit."""
|
|
big_content = {"type": "user", "uuid": "u9", "data": "x" * 1_000_000}
|
|
content = _make_jsonl(METADATA_LINE, FILE_HISTORY, big_content, ASST_MSG)
|
|
path = tmp_path / "big.jsonl"
|
|
path.write_text(content)
|
|
result = read_transcript_file(str(path))
|
|
assert result is not None
|
|
|
|
|
|
# --- write_transcript_to_tempfile ---
|
|
|
|
|
|
class TestWriteTranscriptToTempfile:
|
|
def test_writes_file_and_returns_path(self, tmp_path):
|
|
cwd = str(tmp_path / "workspace")
|
|
result = write_transcript_to_tempfile(VALID_TRANSCRIPT, "sess-1234-abcd", cwd)
|
|
assert result is not None
|
|
assert os.path.isfile(result)
|
|
assert result.endswith(".jsonl")
|
|
with open(result) as f:
|
|
assert f.read() == VALID_TRANSCRIPT
|
|
|
|
def test_creates_parent_directory(self, tmp_path):
|
|
cwd = str(tmp_path / "new" / "dir")
|
|
result = write_transcript_to_tempfile(VALID_TRANSCRIPT, "sess-1234", cwd)
|
|
assert result is not None
|
|
assert os.path.isdir(cwd)
|
|
|
|
def test_uses_session_id_prefix(self, tmp_path):
|
|
cwd = str(tmp_path)
|
|
result = write_transcript_to_tempfile(VALID_TRANSCRIPT, "abcdef12-rest", cwd)
|
|
assert result is not None
|
|
assert "abcdef12" in os.path.basename(result)
|
|
|
|
|
|
# --- validate_transcript ---
|
|
|
|
|
|
class TestValidateTranscript:
|
|
def test_valid_transcript(self):
|
|
assert validate_transcript(VALID_TRANSCRIPT) is True
|
|
|
|
def test_none_content(self):
|
|
assert validate_transcript(None) is False
|
|
|
|
def test_empty_content(self):
|
|
assert validate_transcript("") is False
|
|
|
|
def test_metadata_only(self):
|
|
content = _make_jsonl(METADATA_LINE, FILE_HISTORY)
|
|
assert validate_transcript(content) is False
|
|
|
|
def test_user_only_no_assistant(self):
|
|
content = _make_jsonl(METADATA_LINE, FILE_HISTORY, USER_MSG)
|
|
assert validate_transcript(content) is False
|
|
|
|
def test_assistant_only_no_user(self):
|
|
content = _make_jsonl(METADATA_LINE, FILE_HISTORY, ASST_MSG)
|
|
assert validate_transcript(content) is False
|
|
|
|
def test_invalid_json_returns_false(self):
|
|
assert validate_transcript("not json\n{}\n{}\n") is False
|
|
|
|
|
|
# --- strip_progress_entries ---
|
|
|
|
|
|
class TestStripProgressEntries:
|
|
def test_strips_all_strippable_types(self):
|
|
"""All STRIPPABLE_TYPES are removed from the output."""
|
|
entries = [
|
|
USER_MSG,
|
|
{"type": "progress", "uuid": "p1", "parentUuid": "u1"},
|
|
{"type": "file-history-snapshot", "files": []},
|
|
{"type": "queue-operation", "subtype": "create"},
|
|
{"type": "summary", "text": "..."},
|
|
{"type": "pr-link", "url": "..."},
|
|
ASST_MSG,
|
|
]
|
|
result = strip_progress_entries(_make_jsonl(*entries))
|
|
result_types = {json.loads(line)["type"] for line in result.strip().split("\n")}
|
|
assert result_types == {"user", "assistant"}
|
|
for stype in STRIPPABLE_TYPES:
|
|
assert stype not in result_types
|
|
|
|
def test_reparents_children_of_stripped_entries(self):
|
|
"""An assistant message whose parent is a progress entry gets reparented."""
|
|
progress = {
|
|
"type": "progress",
|
|
"uuid": "p1",
|
|
"parentUuid": "u1",
|
|
"data": {"type": "bash_progress"},
|
|
}
|
|
asst = {
|
|
"type": "assistant",
|
|
"uuid": "a1",
|
|
"parentUuid": "p1", # Points to progress
|
|
"message": {"role": "assistant", "content": "done"},
|
|
}
|
|
content = _make_jsonl(USER_MSG, progress, asst)
|
|
result = strip_progress_entries(content)
|
|
lines = [json.loads(line) for line in result.strip().split("\n")]
|
|
|
|
asst_entry = next(e for e in lines if e["type"] == "assistant")
|
|
# Should be reparented to u1 (the user message)
|
|
assert asst_entry["parentUuid"] == "u1"
|
|
|
|
def test_reparents_through_chain(self):
|
|
"""Reparenting walks through multiple stripped entries."""
|
|
p1 = {"type": "progress", "uuid": "p1", "parentUuid": "u1"}
|
|
p2 = {"type": "progress", "uuid": "p2", "parentUuid": "p1"}
|
|
p3 = {"type": "progress", "uuid": "p3", "parentUuid": "p2"}
|
|
asst = {
|
|
"type": "assistant",
|
|
"uuid": "a1",
|
|
"parentUuid": "p3", # 3 levels deep
|
|
"message": {"role": "assistant", "content": "done"},
|
|
}
|
|
content = _make_jsonl(USER_MSG, p1, p2, p3, asst)
|
|
result = strip_progress_entries(content)
|
|
lines = [json.loads(line) for line in result.strip().split("\n")]
|
|
|
|
asst_entry = next(e for e in lines if e["type"] == "assistant")
|
|
assert asst_entry["parentUuid"] == "u1"
|
|
|
|
def test_preserves_non_strippable_entries(self):
|
|
"""User, assistant, and system entries are preserved."""
|
|
system = {"type": "system", "uuid": "s1", "message": "prompt"}
|
|
content = _make_jsonl(system, USER_MSG, ASST_MSG)
|
|
result = strip_progress_entries(content)
|
|
result_types = [json.loads(line)["type"] for line in result.strip().split("\n")]
|
|
assert result_types == ["system", "user", "assistant"]
|
|
|
|
def test_empty_input(self):
|
|
result = strip_progress_entries("")
|
|
# Should return just a newline (empty content stripped)
|
|
assert result.strip() == ""
|
|
|
|
def test_no_strippable_entries(self):
|
|
"""When there's nothing to strip, output matches input structure."""
|
|
content = _make_jsonl(USER_MSG, ASST_MSG)
|
|
result = strip_progress_entries(content)
|
|
result_lines = result.strip().split("\n")
|
|
assert len(result_lines) == 2
|
|
|
|
def test_handles_entries_without_uuid(self):
|
|
"""Entries without uuid field are handled gracefully."""
|
|
no_uuid = {"type": "queue-operation", "subtype": "create"}
|
|
content = _make_jsonl(no_uuid, USER_MSG, ASST_MSG)
|
|
result = strip_progress_entries(content)
|
|
result_types = [json.loads(line)["type"] for line in result.strip().split("\n")]
|
|
# queue-operation is strippable
|
|
assert "queue-operation" not in result_types
|
|
assert "user" in result_types
|
|
assert "assistant" in result_types
|