mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-03-17 03:00:27 -04:00
Compare commits
2 Commits
dev
...
fix/transc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3cfe2f384a | ||
|
|
fea6711ae7 |
@@ -1408,46 +1408,11 @@ async def stream_chat_completion_sdk(
|
|||||||
) and not has_appended_assistant:
|
) and not has_appended_assistant:
|
||||||
session.messages.append(assistant_response)
|
session.messages.append(assistant_response)
|
||||||
|
|
||||||
# --- Upload transcript for next-turn --resume ---
|
# Transcript upload is handled in the finally block below — it
|
||||||
# After async with the SDK task group has exited, so the Stop
|
# correctly prefers the stop hook content (new turn data) over the
|
||||||
# hook has already fired and the CLI has been SIGTERMed. The
|
# resume file (old downloaded data). Uploading here would write
|
||||||
# CLI uses appendFileSync, so all writes are safely on disk.
|
# stale data that the finally block then overwrites with potentially
|
||||||
if config.claude_agent_use_resume and user_id:
|
# smaller (but newer) stop hook content.
|
||||||
# With --resume the CLI appends to the resume file (most
|
|
||||||
# complete). Otherwise use the Stop hook path.
|
|
||||||
if use_resume and resume_file:
|
|
||||||
raw_transcript = read_transcript_file(resume_file)
|
|
||||||
logger.debug("[SDK] Transcript source: resume file")
|
|
||||||
elif captured_transcript.path:
|
|
||||||
raw_transcript = read_transcript_file(captured_transcript.path)
|
|
||||||
logger.debug(
|
|
||||||
"[SDK] Transcript source: stop hook (%s), read result: %s",
|
|
||||||
captured_transcript.path,
|
|
||||||
f"{len(raw_transcript)}B" if raw_transcript else "None",
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raw_transcript = None
|
|
||||||
|
|
||||||
if not raw_transcript:
|
|
||||||
logger.debug(
|
|
||||||
"[SDK] No usable transcript — CLI file had no "
|
|
||||||
"conversation entries (expected for first turn "
|
|
||||||
"without --resume)"
|
|
||||||
)
|
|
||||||
|
|
||||||
if raw_transcript:
|
|
||||||
# Shield the upload from generator cancellation so a
|
|
||||||
# client disconnect / page refresh doesn't lose the
|
|
||||||
# transcript. The upload must finish even if the SSE
|
|
||||||
# connection is torn down.
|
|
||||||
await asyncio.shield(
|
|
||||||
_try_upload_transcript(
|
|
||||||
user_id,
|
|
||||||
session_id,
|
|
||||||
raw_transcript,
|
|
||||||
message_count=len(session.messages),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"[SDK] [%s] Stream completed successfully with %d messages",
|
"[SDK] [%s] Stream completed successfully with %d messages",
|
||||||
|
|||||||
@@ -331,10 +331,10 @@ async def upload_transcript(
|
|||||||
) -> None:
|
) -> None:
|
||||||
"""Strip progress entries and upload transcript to bucket storage.
|
"""Strip progress entries and upload transcript to bucket storage.
|
||||||
|
|
||||||
Safety: only overwrites when the new (stripped) transcript is larger than
|
The executor holds a cluster lock per session, so concurrent uploads for
|
||||||
what is already stored. Since JSONL is append-only, the latest transcript
|
the same session cannot happen. We always overwrite — with ``--resume``
|
||||||
is always the longest. This prevents a slow/stale background task from
|
the CLI may compact old tool results, so neither byte size nor line count
|
||||||
clobbering a newer upload from a concurrent turn.
|
is a reliable proxy for "newer".
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
message_count: ``len(session.messages)`` at upload time — used by
|
message_count: ``len(session.messages)`` at upload time — used by
|
||||||
@@ -353,33 +353,16 @@ async def upload_transcript(
|
|||||||
storage = await get_workspace_storage()
|
storage = await get_workspace_storage()
|
||||||
wid, fid, fname = _storage_path_parts(user_id, session_id)
|
wid, fid, fname = _storage_path_parts(user_id, session_id)
|
||||||
encoded = stripped.encode("utf-8")
|
encoded = stripped.encode("utf-8")
|
||||||
new_size = len(encoded)
|
|
||||||
|
|
||||||
# Check existing transcript size to avoid overwriting newer with older
|
await storage.store(
|
||||||
path = _build_storage_path(user_id, session_id, storage)
|
workspace_id=wid,
|
||||||
content_skipped = False
|
file_id=fid,
|
||||||
try:
|
filename=fname,
|
||||||
existing = await storage.retrieve(path)
|
content=encoded,
|
||||||
if len(existing) >= new_size:
|
)
|
||||||
logger.info(
|
|
||||||
f"[Transcript] Skipping content upload — existing ({len(existing)}B) "
|
|
||||||
f">= new ({new_size}B) for session {session_id}"
|
|
||||||
)
|
|
||||||
content_skipped = True
|
|
||||||
except (FileNotFoundError, Exception):
|
|
||||||
pass # No existing transcript or retrieval error — proceed with upload
|
|
||||||
|
|
||||||
if not content_skipped:
|
# Update metadata so message_count stays current. The gap-fill logic
|
||||||
await storage.store(
|
# in _build_query_message relies on it to avoid re-compressing messages.
|
||||||
workspace_id=wid,
|
|
||||||
file_id=fid,
|
|
||||||
filename=fname,
|
|
||||||
content=encoded,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Always update metadata (even when content is skipped) so message_count
|
|
||||||
# stays current. The gap-fill logic in _build_query_message relies on
|
|
||||||
# message_count to avoid re-compressing the same messages every turn.
|
|
||||||
try:
|
try:
|
||||||
meta = {"message_count": message_count, "uploaded_at": time.time()}
|
meta = {"message_count": message_count, "uploaded_at": time.time()}
|
||||||
mwid, mfid, mfname = _meta_storage_path_parts(user_id, session_id)
|
mwid, mfid, mfname = _meta_storage_path_parts(user_id, session_id)
|
||||||
@@ -393,9 +376,8 @@ async def upload_transcript(
|
|||||||
logger.warning(f"[Transcript] Failed to write metadata for {session_id}: {e}")
|
logger.warning(f"[Transcript] Failed to write metadata for {session_id}: {e}")
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"[Transcript] Uploaded {new_size}B "
|
f"[Transcript] Uploaded {len(encoded)}B "
|
||||||
f"(stripped from {len(content)}B, msg_count={message_count}, "
|
f"(stripped from {len(content)}B, msg_count={message_count}) "
|
||||||
f"content_skipped={content_skipped}) "
|
|
||||||
f"for session {session_id}"
|
f"for session {session_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user