fix(backend/copilot): prevent transcript upload task garbage collection

HIGH severity fix: When upload_transcript times out after 30s, the shielded
coroutine continues running but becomes orphaned (no reference). Python's GC
can reclaim the task before completion, causing silent data loss.

Fix: If TimeoutError occurs, explicitly create task and track in _background_tasks
to maintain strong reference. Upload completes in background without blocking
session lock release.

Addresses PR discussion r2895491552
This commit is contained in:
Zamil Majdy
2026-03-06 19:23:54 +07:00
parent 0eddb6f1bb
commit b8c65e3d2b

View File

@@ -1554,8 +1554,26 @@ async def stream_chat_completion_sdk(
transcript_builder.entry_count,
len(transcript_content),
)
async with asyncio.timeout(30):
await asyncio.shield(
try:
async with asyncio.timeout(30):
await asyncio.shield(
upload_transcript(
user_id=user_id,
session_id=session_id,
content=transcript_content,
message_count=len(session.messages),
log_prefix=log_prefix,
)
)
except TimeoutError:
# Timeout fired but shield keeps upload running - track it
# to prevent garbage collection (maintain strong reference)
logger.warning(
"%s Transcript upload exceeded 30s timeout, "
"continuing in background",
log_prefix,
)
task = asyncio.create_task(
upload_transcript(
user_id=user_id,
session_id=session_id,
@@ -1564,6 +1582,8 @@ async def stream_chat_completion_sdk(
log_prefix=log_prefix,
)
)
_background_tasks.add(task)
task.add_done_callback(_background_tasks.discard)
except Exception as upload_err:
logger.error(
"%s Transcript upload failed in finally: %s",