mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
fix(backend/copilot): split broad except in _read_cli_session_from_disk, clean up dataclass field comment
- Split `except Exception` into `UnicodeDecodeError` (returns raw) + nested `OSError` on write-back (returns stripped for GCS despite local failure) + fallback `Exception` — eliminates path leak via OSError str() and clarifies each fallback path's intent - Move write-back into its own nested try/except so pyright can verify `stripped_bytes` is always bound before use - Simplify `TranscriptDownload.mode` default: use conventional comment on separate line instead of parenthesized expression - Add `TestReadCliSessionFromDisk` tests covering both new exception branches
This commit is contained in:
@@ -939,8 +939,18 @@ def _read_cli_session_from_disk(
|
||||
raw_text = raw_bytes.decode("utf-8")
|
||||
stripped_text = strip_for_upload(raw_text)
|
||||
stripped_bytes = stripped_text.encode("utf-8")
|
||||
if len(stripped_bytes) < len(raw_bytes):
|
||||
# Write back locally so same-pod turns also benefit.
|
||||
except UnicodeDecodeError:
|
||||
logger.warning("%s CLI session is not valid UTF-8, uploading raw", log_prefix)
|
||||
return raw_bytes
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"%s Failed to strip CLI session, uploading raw: %s", log_prefix, e
|
||||
)
|
||||
return raw_bytes
|
||||
|
||||
if len(stripped_bytes) < len(raw_bytes):
|
||||
# Write back locally so same-pod turns also benefit.
|
||||
try:
|
||||
Path(real_path).write_bytes(stripped_bytes)
|
||||
logger.info(
|
||||
"%s Stripped CLI session: %dB → %dB",
|
||||
@@ -948,12 +958,15 @@ def _read_cli_session_from_disk(
|
||||
len(raw_bytes),
|
||||
len(stripped_bytes),
|
||||
)
|
||||
return stripped_bytes
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"%s Failed to strip CLI session, uploading raw: %s", log_prefix, e
|
||||
)
|
||||
return raw_bytes
|
||||
except OSError as e:
|
||||
# write_bytes failed — stripped content is still valid for GCS upload even
|
||||
# though the local write-back failed (same-pod optimization silently skipped).
|
||||
logger.warning(
|
||||
"%s Failed to write back stripped CLI session: %s",
|
||||
log_prefix,
|
||||
e.strerror or str(e),
|
||||
)
|
||||
return stripped_bytes
|
||||
|
||||
|
||||
def _process_cli_restore(
|
||||
|
||||
@@ -1451,3 +1451,89 @@ class TestProcessCliRestore:
|
||||
|
||||
assert not ok
|
||||
assert stripped_str == ""
|
||||
|
||||
|
||||
class TestReadCliSessionFromDisk:
|
||||
"""``_read_cli_session_from_disk`` reads, strips, and optionally writes back the session."""
|
||||
|
||||
def _build_session_file(self, tmp_path, session_id: str):
|
||||
"""Build the session file path inside tmp_path using the same encoding as cli_session_path."""
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
sdk_cwd = str(tmp_path)
|
||||
encoded_cwd = re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(sdk_cwd))
|
||||
session_dir = Path(str(tmp_path)) / encoded_cwd
|
||||
session_dir.mkdir(parents=True, exist_ok=True)
|
||||
return sdk_cwd, session_dir / f"{session_id}.jsonl"
|
||||
|
||||
def test_returns_raw_bytes_for_invalid_utf8(self, tmp_path):
|
||||
"""Non-UTF-8 bytes trigger UnicodeDecodeError — returns raw bytes (upload-raw fallback)."""
|
||||
from unittest.mock import patch
|
||||
|
||||
from backend.copilot.sdk.service import _read_cli_session_from_disk
|
||||
|
||||
session_id = "12345678-0000-0000-0000-aabbccdd0001"
|
||||
projects_base_dir = str(tmp_path)
|
||||
sdk_cwd, session_file = self._build_session_file(tmp_path, session_id)
|
||||
|
||||
# Write raw invalid UTF-8 bytes
|
||||
session_file.write_bytes(b"\xff\xfe invalid utf-8\n")
|
||||
|
||||
with (
|
||||
patch(
|
||||
"backend.copilot.sdk.service.projects_base",
|
||||
return_value=projects_base_dir,
|
||||
),
|
||||
patch(
|
||||
"backend.copilot.transcript.projects_base",
|
||||
return_value=projects_base_dir,
|
||||
),
|
||||
):
|
||||
result = _read_cli_session_from_disk(sdk_cwd, session_id, "[Test]")
|
||||
|
||||
# UnicodeDecodeError path returns the raw bytes (upload-raw fallback)
|
||||
assert result == b"\xff\xfe invalid utf-8\n"
|
||||
|
||||
def test_write_back_oserror_still_returns_stripped_bytes(self, tmp_path):
|
||||
"""OSError on write-back returns stripped bytes for GCS upload (not raw)."""
|
||||
from unittest.mock import patch
|
||||
|
||||
from backend.copilot.sdk.service import _read_cli_session_from_disk
|
||||
|
||||
session_id = "12345678-0000-0000-0000-aabbccdd0002"
|
||||
projects_base_dir = str(tmp_path)
|
||||
sdk_cwd, session_file = self._build_session_file(tmp_path, session_id)
|
||||
|
||||
# Content with a strippable progress entry so stripped_bytes < raw_bytes
|
||||
raw_content = (
|
||||
'{"type":"progress","uuid":"p1","subtype":"agent_progress","parentUuid":null}\n'
|
||||
'{"type":"user","uuid":"u1","parentUuid":null,"message":{"role":"user","content":"hi"}}\n'
|
||||
'{"type":"assistant","uuid":"a1","parentUuid":"u1","message":{"role":"assistant","content":[{"type":"text","text":"hello"}]}}\n'
|
||||
)
|
||||
session_file.write_bytes(raw_content.encode("utf-8"))
|
||||
# Make the file read-only so write_bytes raises OSError on the write-back
|
||||
session_file.chmod(0o444)
|
||||
|
||||
try:
|
||||
with (
|
||||
patch(
|
||||
"backend.copilot.sdk.service.projects_base",
|
||||
return_value=projects_base_dir,
|
||||
),
|
||||
patch(
|
||||
"backend.copilot.transcript.projects_base",
|
||||
return_value=projects_base_dir,
|
||||
),
|
||||
):
|
||||
result = _read_cli_session_from_disk(sdk_cwd, session_id, "[Test]")
|
||||
finally:
|
||||
session_file.chmod(0o644)
|
||||
|
||||
# Must return stripped bytes (not raw, not None) so GCS gets the clean version
|
||||
assert result is not None
|
||||
assert (
|
||||
b"progress" not in result
|
||||
), "Stripped bytes must not contain progress entry"
|
||||
assert b"hello" in result, "Stripped bytes should contain assistant turn"
|
||||
|
||||
@@ -55,9 +55,8 @@ TranscriptMode = Literal["sdk", "baseline"]
|
||||
class TranscriptDownload:
|
||||
content: bytes
|
||||
message_count: int = 0
|
||||
mode: TranscriptMode = (
|
||||
"sdk" # "sdk" = Claude CLI native, "baseline" = TranscriptBuilder
|
||||
)
|
||||
# "sdk" = Claude CLI native, "baseline" = TranscriptBuilder
|
||||
mode: TranscriptMode = "sdk"
|
||||
|
||||
|
||||
# Storage prefix for the CLI's native session JSONL files (for cross-pod --resume).
|
||||
|
||||
Reference in New Issue
Block a user