From 9415166ee02f7b220ae92fb27b836d7a5a49fdf0 Mon Sep 17 00:00:00 2001 From: Zamil Majdy Date: Thu, 16 Apr 2026 06:58:04 +0700 Subject: [PATCH] fix(backend/copilot): split broad except in _read_cli_session_from_disk, clean up dataclass field comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Split `except Exception` into `UnicodeDecodeError` (returns raw) + nested `OSError` on write-back (returns stripped for GCS despite local failure) + fallback `Exception` — eliminates path leak via OSError str() and clarifies each fallback path's intent - Move write-back into its own nested try/except so pyright can verify `stripped_bytes` is always bound before use - Simplify `TranscriptDownload.mode` default: use conventional comment on separate line instead of parenthesized expression - Add `TestReadCliSessionFromDisk` tests covering both new exception branches --- .../backend/backend/copilot/sdk/service.py | 29 +++++-- .../backend/copilot/sdk/transcript_test.py | 86 +++++++++++++++++++ .../backend/backend/copilot/transcript.py | 5 +- 3 files changed, 109 insertions(+), 11 deletions(-) diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py index 01426367a9..6965c88738 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/service.py +++ b/autogpt_platform/backend/backend/copilot/sdk/service.py @@ -939,8 +939,18 @@ def _read_cli_session_from_disk( raw_text = raw_bytes.decode("utf-8") stripped_text = strip_for_upload(raw_text) stripped_bytes = stripped_text.encode("utf-8") - if len(stripped_bytes) < len(raw_bytes): - # Write back locally so same-pod turns also benefit. + except UnicodeDecodeError: + logger.warning("%s CLI session is not valid UTF-8, uploading raw", log_prefix) + return raw_bytes + except Exception as e: + logger.warning( + "%s Failed to strip CLI session, uploading raw: %s", log_prefix, e + ) + return raw_bytes + + if len(stripped_bytes) < len(raw_bytes): + # Write back locally so same-pod turns also benefit. + try: Path(real_path).write_bytes(stripped_bytes) logger.info( "%s Stripped CLI session: %dB → %dB", @@ -948,12 +958,15 @@ def _read_cli_session_from_disk( len(raw_bytes), len(stripped_bytes), ) - return stripped_bytes - except Exception as e: - logger.warning( - "%s Failed to strip CLI session, uploading raw: %s", log_prefix, e - ) - return raw_bytes + except OSError as e: + # write_bytes failed — stripped content is still valid for GCS upload even + # though the local write-back failed (same-pod optimization silently skipped). + logger.warning( + "%s Failed to write back stripped CLI session: %s", + log_prefix, + e.strerror or str(e), + ) + return stripped_bytes def _process_cli_restore( diff --git a/autogpt_platform/backend/backend/copilot/sdk/transcript_test.py b/autogpt_platform/backend/backend/copilot/sdk/transcript_test.py index 6588169dd3..f8e1608094 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/transcript_test.py +++ b/autogpt_platform/backend/backend/copilot/sdk/transcript_test.py @@ -1451,3 +1451,89 @@ class TestProcessCliRestore: assert not ok assert stripped_str == "" + + +class TestReadCliSessionFromDisk: + """``_read_cli_session_from_disk`` reads, strips, and optionally writes back the session.""" + + def _build_session_file(self, tmp_path, session_id: str): + """Build the session file path inside tmp_path using the same encoding as cli_session_path.""" + import os + import re + from pathlib import Path + + sdk_cwd = str(tmp_path) + encoded_cwd = re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(sdk_cwd)) + session_dir = Path(str(tmp_path)) / encoded_cwd + session_dir.mkdir(parents=True, exist_ok=True) + return sdk_cwd, session_dir / f"{session_id}.jsonl" + + def test_returns_raw_bytes_for_invalid_utf8(self, tmp_path): + """Non-UTF-8 bytes trigger UnicodeDecodeError — returns raw bytes (upload-raw fallback).""" + from unittest.mock import patch + + from backend.copilot.sdk.service import _read_cli_session_from_disk + + session_id = "12345678-0000-0000-0000-aabbccdd0001" + projects_base_dir = str(tmp_path) + sdk_cwd, session_file = self._build_session_file(tmp_path, session_id) + + # Write raw invalid UTF-8 bytes + session_file.write_bytes(b"\xff\xfe invalid utf-8\n") + + with ( + patch( + "backend.copilot.sdk.service.projects_base", + return_value=projects_base_dir, + ), + patch( + "backend.copilot.transcript.projects_base", + return_value=projects_base_dir, + ), + ): + result = _read_cli_session_from_disk(sdk_cwd, session_id, "[Test]") + + # UnicodeDecodeError path returns the raw bytes (upload-raw fallback) + assert result == b"\xff\xfe invalid utf-8\n" + + def test_write_back_oserror_still_returns_stripped_bytes(self, tmp_path): + """OSError on write-back returns stripped bytes for GCS upload (not raw).""" + from unittest.mock import patch + + from backend.copilot.sdk.service import _read_cli_session_from_disk + + session_id = "12345678-0000-0000-0000-aabbccdd0002" + projects_base_dir = str(tmp_path) + sdk_cwd, session_file = self._build_session_file(tmp_path, session_id) + + # Content with a strippable progress entry so stripped_bytes < raw_bytes + raw_content = ( + '{"type":"progress","uuid":"p1","subtype":"agent_progress","parentUuid":null}\n' + '{"type":"user","uuid":"u1","parentUuid":null,"message":{"role":"user","content":"hi"}}\n' + '{"type":"assistant","uuid":"a1","parentUuid":"u1","message":{"role":"assistant","content":[{"type":"text","text":"hello"}]}}\n' + ) + session_file.write_bytes(raw_content.encode("utf-8")) + # Make the file read-only so write_bytes raises OSError on the write-back + session_file.chmod(0o444) + + try: + with ( + patch( + "backend.copilot.sdk.service.projects_base", + return_value=projects_base_dir, + ), + patch( + "backend.copilot.transcript.projects_base", + return_value=projects_base_dir, + ), + ): + result = _read_cli_session_from_disk(sdk_cwd, session_id, "[Test]") + finally: + session_file.chmod(0o644) + + # Must return stripped bytes (not raw, not None) so GCS gets the clean version + assert result is not None + assert ( + b"progress" not in result + ), "Stripped bytes must not contain progress entry" + assert b"hello" in result, "Stripped bytes should contain assistant turn" diff --git a/autogpt_platform/backend/backend/copilot/transcript.py b/autogpt_platform/backend/backend/copilot/transcript.py index 370807058c..b5e2d60dde 100644 --- a/autogpt_platform/backend/backend/copilot/transcript.py +++ b/autogpt_platform/backend/backend/copilot/transcript.py @@ -55,9 +55,8 @@ TranscriptMode = Literal["sdk", "baseline"] class TranscriptDownload: content: bytes message_count: int = 0 - mode: TranscriptMode = ( - "sdk" # "sdk" = Claude CLI native, "baseline" = TranscriptBuilder - ) + # "sdk" = Claude CLI native, "baseline" = TranscriptBuilder + mode: TranscriptMode = "sdk" # Storage prefix for the CLI's native session JSONL files (for cross-pod --resume).