mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-13 08:14:58 -05:00
fix(chat/sdk): fix transcript validation and type captured_transcript properly
- Replace dict[str,str] with CapturedTranscript dataclass for type safety - Fix validate_transcript requiring >=3 lines — after stripping metadata, a valid 1-turn conversation is just user+assistant (2 lines) - Apply CodeQL autofix: internalize max_len in _sanitize_id, add fallback
This commit is contained in:
@@ -6,6 +6,7 @@ import logging
|
||||
import os
|
||||
import uuid
|
||||
from collections.abc import AsyncGenerator
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from backend.util.exceptions import NotFoundError
|
||||
@@ -60,6 +61,18 @@ config = ChatConfig()
|
||||
_background_tasks: set[asyncio.Task[Any]] = set()
|
||||
|
||||
|
||||
@dataclass
|
||||
class CapturedTranscript:
|
||||
"""Info captured by the SDK Stop hook for stateless --resume."""
|
||||
|
||||
path: str = ""
|
||||
sdk_session_id: str = ""
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return bool(self.path)
|
||||
|
||||
|
||||
_SDK_CWD_PREFIX = WORKSPACE_PREFIX
|
||||
|
||||
# Appended to the system prompt to inform the agent about available tools.
|
||||
@@ -502,11 +515,11 @@ async def stream_chat_completion_sdk(
|
||||
sdk_model = _resolve_sdk_model()
|
||||
|
||||
# --- Transcript capture via Stop hook ---
|
||||
captured_transcript: dict[str, str] = {}
|
||||
captured_transcript = CapturedTranscript()
|
||||
|
||||
def _on_stop(transcript_path: str, sdk_session_id: str) -> None:
|
||||
captured_transcript["path"] = transcript_path
|
||||
captured_transcript["session_id"] = sdk_session_id
|
||||
captured_transcript.path = transcript_path
|
||||
captured_transcript.sdk_session_id = sdk_session_id
|
||||
|
||||
security_hooks = create_security_hooks(
|
||||
user_id,
|
||||
@@ -567,9 +580,9 @@ async def stream_chat_completion_sdk(
|
||||
yield StreamFinish()
|
||||
return
|
||||
|
||||
# Build query: with --resume the CLI already has full context,
|
||||
# so we only send the new message. Without resume, compress
|
||||
# history into a context prefix as before.
|
||||
# Build query: with --resume the CLI already has full
|
||||
# context, so we only send the new message. Without
|
||||
# resume, compress history into a context prefix.
|
||||
query_message = current_message
|
||||
if not use_resume and len(session.messages) > 1:
|
||||
logger.warning(
|
||||
@@ -675,13 +688,12 @@ async def stream_chat_completion_sdk(
|
||||
if (
|
||||
config.claude_agent_use_resume
|
||||
and user_id
|
||||
and captured_transcript.get("path")
|
||||
and captured_transcript.available
|
||||
):
|
||||
# Give CLI time to flush JSONL writes before we read
|
||||
await asyncio.sleep(0.5)
|
||||
raw_transcript = read_transcript_file(captured_transcript["path"])
|
||||
raw_transcript = read_transcript_file(captured_transcript.path)
|
||||
if raw_transcript:
|
||||
# Upload in background — strip + store to bucket
|
||||
task = asyncio.create_task(
|
||||
_upload_transcript_bg(user_id, session_id, raw_transcript)
|
||||
)
|
||||
|
||||
@@ -145,13 +145,15 @@ def read_transcript_file(transcript_path: str) -> str | None:
|
||||
return None
|
||||
|
||||
|
||||
def _sanitize_id(raw_id: str) -> str:
|
||||
def _sanitize_id(raw_id: str, max_len: int = 36) -> str:
|
||||
"""Sanitize an ID for safe use in file paths.
|
||||
|
||||
Session/user IDs are UUIDs (hex + hyphens). Strip everything else
|
||||
to prevent path traversal or injection via crafted IDs.
|
||||
Session/user IDs are expected to be UUIDs (hex + hyphens). Strip
|
||||
everything else and truncate to *max_len* so the result cannot introduce
|
||||
path separators or other special characters.
|
||||
"""
|
||||
return _SAFE_ID_RE.sub("", raw_id)
|
||||
cleaned = _SAFE_ID_RE.sub("", raw_id or "")[:max_len]
|
||||
return cleaned or "unknown"
|
||||
|
||||
|
||||
_SAFE_CWD_PREFIX = os.path.realpath("/tmp/copilot-")
|
||||
@@ -177,7 +179,7 @@ def write_transcript_to_tempfile(
|
||||
|
||||
try:
|
||||
os.makedirs(real_cwd, exist_ok=True)
|
||||
safe_id = _sanitize_id(session_id)[:8]
|
||||
safe_id = _sanitize_id(session_id, max_len=8)
|
||||
jsonl_path = os.path.join(real_cwd, f"transcript-{safe_id}.jsonl")
|
||||
|
||||
with open(jsonl_path, "w") as f:
|
||||
@@ -202,7 +204,7 @@ def validate_transcript(content: str | None) -> bool:
|
||||
return False
|
||||
|
||||
lines = content.strip().split("\n")
|
||||
if len(lines) < 3:
|
||||
if len(lines) < 2:
|
||||
return False
|
||||
|
||||
has_user = False
|
||||
|
||||
Reference in New Issue
Block a user