mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
fix(backend/copilot): use CLI session file for transcript upload to preserve compaction
The TranscriptBuilder accumulates all raw SDK stream messages including pre-compaction content. When the CLI compacts mid-stream, the uploaded transcript still contains the full uncompacted messages, causing "Prompt is too long" errors on the next --resume turn. Fix: - Read the CLI's own session file (~/.claude/projects/<cwd>/*.jsonl) which reflects mid-stream compaction, instead of TranscriptBuilder - Extract _cli_project_dir() helper, refactor cleanup_cli_project_dir - On "Prompt is too long" error with --resume, delete the oversized transcript so the next turn falls back to compression-based context
This commit is contained in:
@@ -76,7 +76,9 @@ from .tool_adapter import (
|
||||
)
|
||||
from .transcript import (
|
||||
cleanup_cli_project_dir,
|
||||
delete_transcript,
|
||||
download_transcript,
|
||||
read_cli_session_file,
|
||||
upload_transcript,
|
||||
validate_transcript,
|
||||
write_transcript_to_tempfile,
|
||||
@@ -1045,6 +1047,35 @@ async def stream_chat_completion_sdk(
|
||||
exc_info=True,
|
||||
)
|
||||
ended_with_stream_error = True
|
||||
|
||||
# "Prompt is too long" means the resumed transcript
|
||||
# exceeded the model's context window. Delete the
|
||||
# oversized transcript so the next turn falls back to
|
||||
# the compression-based _build_query_message path.
|
||||
err_str = str(stream_err)
|
||||
if (
|
||||
"prompt is too long" in err_str.lower()
|
||||
and use_resume
|
||||
and user_id
|
||||
):
|
||||
logger.warning(
|
||||
"%s Prompt too long with --resume — deleting "
|
||||
"oversized transcript for session %s",
|
||||
log_prefix,
|
||||
session_id,
|
||||
)
|
||||
try:
|
||||
await delete_transcript(user_id, session_id)
|
||||
except Exception as del_err:
|
||||
logger.warning(
|
||||
"%s Failed to delete oversized transcript: %s",
|
||||
log_prefix,
|
||||
del_err,
|
||||
)
|
||||
# Prevent the finally block from re-uploading the
|
||||
# same oversized transcript.
|
||||
use_resume = False
|
||||
|
||||
yield StreamError(
|
||||
errorText=f"SDK stream error: {stream_err}",
|
||||
code="sdk_stream_error",
|
||||
@@ -1425,10 +1456,33 @@ async def stream_chat_completion_sdk(
|
||||
# This MUST run in finally so the transcript is uploaded even when
|
||||
# the streaming loop raises an exception.
|
||||
# The transcript represents the COMPLETE active context (atomic).
|
||||
if config.claude_agent_use_resume and user_id and session is not None:
|
||||
if (
|
||||
config.claude_agent_use_resume
|
||||
and use_resume
|
||||
and user_id
|
||||
and session is not None
|
||||
):
|
||||
try:
|
||||
# Build complete transcript from captured SDK messages
|
||||
transcript_content = transcript_builder.to_jsonl()
|
||||
# Prefer the CLI's own session file — it reflects any
|
||||
# mid-stream compaction the CLI performed. Fall back to
|
||||
# TranscriptBuilder output when the CLI file isn't available
|
||||
# (e.g. the process was killed before writing it).
|
||||
cli_transcript = read_cli_session_file(sdk_cwd) if sdk_cwd else None
|
||||
if cli_transcript:
|
||||
transcript_content = cli_transcript
|
||||
logger.info(
|
||||
"%s Using CLI session file for transcript upload " "(%d bytes)",
|
||||
log_prefix,
|
||||
len(cli_transcript),
|
||||
)
|
||||
else:
|
||||
transcript_content = transcript_builder.to_jsonl()
|
||||
logger.info(
|
||||
"%s CLI session file not available, using "
|
||||
"TranscriptBuilder (%d bytes)",
|
||||
log_prefix,
|
||||
len(transcript_content) if transcript_content else 0,
|
||||
)
|
||||
|
||||
if not transcript_content:
|
||||
logger.warning(
|
||||
|
||||
@@ -137,6 +137,59 @@ def _sanitize_id(raw_id: str, max_len: int = 36) -> str:
|
||||
_SAFE_CWD_PREFIX = os.path.realpath("/tmp/copilot-")
|
||||
|
||||
|
||||
def _cli_project_dir(sdk_cwd: str) -> str | None:
|
||||
"""Return the CLI's project directory for a given working directory.
|
||||
|
||||
Returns ``None`` if the path would escape the projects base.
|
||||
"""
|
||||
cwd_encoded = re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(sdk_cwd))
|
||||
config_dir = os.environ.get("CLAUDE_CONFIG_DIR") or os.path.expanduser("~/.claude")
|
||||
projects_base = os.path.realpath(os.path.join(config_dir, "projects"))
|
||||
project_dir = os.path.realpath(os.path.join(projects_base, cwd_encoded))
|
||||
|
||||
if not project_dir.startswith(projects_base + os.sep):
|
||||
logger.warning(f"[Transcript] Project dir escaped projects base: {project_dir}")
|
||||
return None
|
||||
return project_dir
|
||||
|
||||
|
||||
def read_cli_session_file(sdk_cwd: str) -> str | None:
|
||||
"""Read the CLI's own session file, which reflects any compaction.
|
||||
|
||||
The CLI writes its session transcript to
|
||||
``~/.claude/projects/<encoded_cwd>/<session_id>.jsonl``.
|
||||
Since each SDK turn uses a unique ``sdk_cwd``, there should be
|
||||
exactly one ``.jsonl`` file in that directory.
|
||||
|
||||
Returns the file content, or ``None`` if not found.
|
||||
"""
|
||||
import glob
|
||||
|
||||
project_dir = _cli_project_dir(sdk_cwd)
|
||||
if not project_dir or not os.path.isdir(project_dir):
|
||||
return None
|
||||
|
||||
jsonl_files = glob.glob(os.path.join(project_dir, "*.jsonl"))
|
||||
if not jsonl_files:
|
||||
logger.debug("[Transcript] No CLI session file found in %s", project_dir)
|
||||
return None
|
||||
|
||||
# Pick the most recently modified file (should be only one per turn).
|
||||
session_file = max(jsonl_files, key=os.path.getmtime)
|
||||
try:
|
||||
with open(session_file) as f:
|
||||
content = f.read()
|
||||
logger.info(
|
||||
"[Transcript] Read CLI session file: %s (%d bytes)",
|
||||
session_file,
|
||||
len(content),
|
||||
)
|
||||
return content
|
||||
except OSError as e:
|
||||
logger.warning("[Transcript] Failed to read CLI session file: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def cleanup_cli_project_dir(sdk_cwd: str) -> None:
|
||||
"""Remove the CLI's project directory for a specific working directory.
|
||||
|
||||
@@ -146,16 +199,8 @@ def cleanup_cli_project_dir(sdk_cwd: str) -> None:
|
||||
"""
|
||||
import shutil
|
||||
|
||||
# Encode cwd the same way CLI does (replaces non-alphanumeric with -)
|
||||
cwd_encoded = re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(sdk_cwd))
|
||||
config_dir = os.environ.get("CLAUDE_CONFIG_DIR") or os.path.expanduser("~/.claude")
|
||||
projects_base = os.path.realpath(os.path.join(config_dir, "projects"))
|
||||
project_dir = os.path.realpath(os.path.join(projects_base, cwd_encoded))
|
||||
|
||||
if not project_dir.startswith(projects_base + os.sep):
|
||||
logger.warning(
|
||||
f"[Transcript] Cleanup path escaped projects base: {project_dir}"
|
||||
)
|
||||
project_dir = _cli_project_dir(sdk_cwd)
|
||||
if not project_dir:
|
||||
return
|
||||
|
||||
if os.path.isdir(project_dir):
|
||||
|
||||
Reference in New Issue
Block a user