mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-03-17 03:00:27 -04:00
Compare commits
2 Commits
ubbe/tailw
...
fix/transc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3cfe2f384a | ||
|
|
fea6711ae7 |
@@ -1408,46 +1408,11 @@ async def stream_chat_completion_sdk(
|
||||
) and not has_appended_assistant:
|
||||
session.messages.append(assistant_response)
|
||||
|
||||
# --- Upload transcript for next-turn --resume ---
|
||||
# After async with the SDK task group has exited, so the Stop
|
||||
# hook has already fired and the CLI has been SIGTERMed. The
|
||||
# CLI uses appendFileSync, so all writes are safely on disk.
|
||||
if config.claude_agent_use_resume and user_id:
|
||||
# With --resume the CLI appends to the resume file (most
|
||||
# complete). Otherwise use the Stop hook path.
|
||||
if use_resume and resume_file:
|
||||
raw_transcript = read_transcript_file(resume_file)
|
||||
logger.debug("[SDK] Transcript source: resume file")
|
||||
elif captured_transcript.path:
|
||||
raw_transcript = read_transcript_file(captured_transcript.path)
|
||||
logger.debug(
|
||||
"[SDK] Transcript source: stop hook (%s), read result: %s",
|
||||
captured_transcript.path,
|
||||
f"{len(raw_transcript)}B" if raw_transcript else "None",
|
||||
)
|
||||
else:
|
||||
raw_transcript = None
|
||||
|
||||
if not raw_transcript:
|
||||
logger.debug(
|
||||
"[SDK] No usable transcript — CLI file had no "
|
||||
"conversation entries (expected for first turn "
|
||||
"without --resume)"
|
||||
)
|
||||
|
||||
if raw_transcript:
|
||||
# Shield the upload from generator cancellation so a
|
||||
# client disconnect / page refresh doesn't lose the
|
||||
# transcript. The upload must finish even if the SSE
|
||||
# connection is torn down.
|
||||
await asyncio.shield(
|
||||
_try_upload_transcript(
|
||||
user_id,
|
||||
session_id,
|
||||
raw_transcript,
|
||||
message_count=len(session.messages),
|
||||
)
|
||||
)
|
||||
# Transcript upload is handled in the finally block below — it
|
||||
# correctly prefers the stop hook content (new turn data) over the
|
||||
# resume file (old downloaded data). Uploading here would write
|
||||
# stale data that the finally block then overwrites with potentially
|
||||
# smaller (but newer) stop hook content.
|
||||
|
||||
logger.info(
|
||||
"[SDK] [%s] Stream completed successfully with %d messages",
|
||||
|
||||
@@ -331,10 +331,10 @@ async def upload_transcript(
|
||||
) -> None:
|
||||
"""Strip progress entries and upload transcript to bucket storage.
|
||||
|
||||
Safety: only overwrites when the new (stripped) transcript is larger than
|
||||
what is already stored. Since JSONL is append-only, the latest transcript
|
||||
is always the longest. This prevents a slow/stale background task from
|
||||
clobbering a newer upload from a concurrent turn.
|
||||
The executor holds a cluster lock per session, so concurrent uploads for
|
||||
the same session cannot happen. We always overwrite — with ``--resume``
|
||||
the CLI may compact old tool results, so neither byte size nor line count
|
||||
is a reliable proxy for "newer".
|
||||
|
||||
Args:
|
||||
message_count: ``len(session.messages)`` at upload time — used by
|
||||
@@ -353,33 +353,16 @@ async def upload_transcript(
|
||||
storage = await get_workspace_storage()
|
||||
wid, fid, fname = _storage_path_parts(user_id, session_id)
|
||||
encoded = stripped.encode("utf-8")
|
||||
new_size = len(encoded)
|
||||
|
||||
# Check existing transcript size to avoid overwriting newer with older
|
||||
path = _build_storage_path(user_id, session_id, storage)
|
||||
content_skipped = False
|
||||
try:
|
||||
existing = await storage.retrieve(path)
|
||||
if len(existing) >= new_size:
|
||||
logger.info(
|
||||
f"[Transcript] Skipping content upload — existing ({len(existing)}B) "
|
||||
f">= new ({new_size}B) for session {session_id}"
|
||||
)
|
||||
content_skipped = True
|
||||
except (FileNotFoundError, Exception):
|
||||
pass # No existing transcript or retrieval error — proceed with upload
|
||||
await storage.store(
|
||||
workspace_id=wid,
|
||||
file_id=fid,
|
||||
filename=fname,
|
||||
content=encoded,
|
||||
)
|
||||
|
||||
if not content_skipped:
|
||||
await storage.store(
|
||||
workspace_id=wid,
|
||||
file_id=fid,
|
||||
filename=fname,
|
||||
content=encoded,
|
||||
)
|
||||
|
||||
# Always update metadata (even when content is skipped) so message_count
|
||||
# stays current. The gap-fill logic in _build_query_message relies on
|
||||
# message_count to avoid re-compressing the same messages every turn.
|
||||
# Update metadata so message_count stays current. The gap-fill logic
|
||||
# in _build_query_message relies on it to avoid re-compressing messages.
|
||||
try:
|
||||
meta = {"message_count": message_count, "uploaded_at": time.time()}
|
||||
mwid, mfid, mfname = _meta_storage_path_parts(user_id, session_id)
|
||||
@@ -393,9 +376,8 @@ async def upload_transcript(
|
||||
logger.warning(f"[Transcript] Failed to write metadata for {session_id}: {e}")
|
||||
|
||||
logger.info(
|
||||
f"[Transcript] Uploaded {new_size}B "
|
||||
f"(stripped from {len(content)}B, msg_count={message_count}, "
|
||||
f"content_skipped={content_skipped}) "
|
||||
f"[Transcript] Uploaded {len(encoded)}B "
|
||||
f"(stripped from {len(content)}B, msg_count={message_count}) "
|
||||
f"for session {session_id}"
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user