fix(backend/copilot): scope stale project-dir sweep to current session and expose encode_cwd_for_cli

Addresses multi-tenant safety concern: cleanup_stale_project_dirs now accepts
an optional encoded_cwd parameter that limits the sweep to just the current
session's directory instead of all ~/.claude/projects/ entries. Exposes
encode_cwd_for_cli as a public function from context.py and passes the encoded
cwd from _cleanup_sdk_tool_results. Adds three new tests covering scoped
sweep behaviour.
This commit is contained in:
Zamil Majdy
2026-03-15 22:44:48 +07:00
parent c854c1a485
commit 4b6c2a1323
4 changed files with 135 additions and 8 deletions

View File

@@ -39,11 +39,20 @@ _current_sandbox: ContextVar["AsyncSandbox | None"] = ContextVar(
_current_sdk_cwd: ContextVar[str] = ContextVar("_current_sdk_cwd", default="")
def _encode_cwd_for_cli(cwd: str) -> str:
"""Encode a working directory path the same way the Claude CLI does."""
def encode_cwd_for_cli(cwd: str) -> str:
"""Encode a working directory path the same way the Claude CLI does.
The Claude CLI encodes the absolute cwd as a directory name by replacing
every non-alphanumeric character with ``-``. For example
``/tmp/copilot-abc`` becomes ``-tmp-copilot-abc``.
"""
return re.sub(r"[^a-zA-Z0-9]", "-", os.path.realpath(cwd))
# Keep the private alias for internal callers (backwards compat).
_encode_cwd_for_cli = encode_cwd_for_cli
def set_execution_context(
user_id: str | None,
session: ChatSession,

View File

@@ -38,6 +38,7 @@ from backend.util.settings import Settings
from ..config import ChatConfig
from ..constants import COPILOT_ERROR_PREFIX, COPILOT_SYSTEM_PREFIX
from ..context import encode_cwd_for_cli
from ..model import (
ChatMessage,
ChatSession,
@@ -306,11 +307,14 @@ async def _cleanup_sdk_tool_results(cwd: str) -> None:
await asyncio.to_thread(shutil.rmtree, normalized, True)
# Best-effort sweep of old project dirs to prevent disk leak.
# Pass the encoded cwd so only this session's project directory is swept,
# which is safe in multi-tenant environments.
global _last_sweep_time
now = time.time()
if now - _last_sweep_time >= _SWEEP_INTERVAL_SECONDS:
_last_sweep_time = now
await asyncio.to_thread(cleanup_stale_project_dirs)
encoded = encode_cwd_for_cli(normalized)
await asyncio.to_thread(cleanup_stale_project_dirs, encoded)
def _format_sdk_content_blocks(blocks: list) -> list[dict[str, Any]]:

View File

@@ -155,7 +155,7 @@ _STALE_PROJECT_DIR_SECONDS = 12 * 3600 # 12 hours — matches max session lifet
_MAX_PROJECT_DIRS_TO_SWEEP = 50 # limit per sweep to avoid long pauses
def cleanup_stale_project_dirs() -> int:
def cleanup_stale_project_dirs(encoded_cwd: str | None = None) -> int:
"""Remove CLI project directories older than ``_STALE_PROJECT_DIR_SECONDS``.
Each CoPilot SDK turn creates a unique ``~/.claude/projects/<encoded-cwd>/``
@@ -164,10 +164,13 @@ def cleanup_stale_project_dirs() -> int:
become stale. This function sweeps old ones to prevent unbounded disk
growth.
Only directories matching the copilot naming pattern (``-tmp-copilot-``)
are considered. The 12 h TTL ensures active sessions are never affected,
even in multi-tenant deployments where multiple copilot sessions share
the same host. Returns the number of directories removed.
When *encoded_cwd* is provided the sweep is scoped to that single
directory, making the operation safe in multi-tenant environments where
multiple copilot sessions share the same host. Without it the function
falls back to sweeping all directories matching the copilot naming pattern
(``-tmp-copilot-``), which is only safe for single-tenant deployments.
Returns the number of directories removed.
"""
projects_base = _projects_base()
if not os.path.isdir(projects_base):
@@ -176,6 +179,40 @@ def cleanup_stale_project_dirs() -> int:
now = time.time()
removed = 0
# Scoped mode: only clean up the one directory for the current session.
if encoded_cwd:
target = Path(projects_base) / encoded_cwd
if not target.is_dir():
return 0
# Guard: only sweep copilot-generated dirs.
if "-tmp-copilot-" not in target.name:
logger.warning(
"[Transcript] Refusing to sweep non-copilot dir: %s", target.name
)
return 0
try:
age = now - target.stat().st_mtime
except OSError:
return 0
if age < _STALE_PROJECT_DIR_SECONDS:
return 0
try:
shutil.rmtree(target, ignore_errors=True)
removed = 1
except OSError:
pass
if removed:
logger.info(
"[Transcript] Swept stale CLI project dir %s (age %ds > %ds)",
target.name,
int(age),
_STALE_PROJECT_DIR_SECONDS,
)
return removed
# Unscoped fallback: sweep all copilot dirs across the projects base.
# Only safe for single-tenant deployments; callers should prefer the
# scoped variant by passing encoded_cwd.
try:
entries = Path(projects_base).iterdir()
except OSError as e:

View File

@@ -945,3 +945,80 @@ class TestCleanupStaleProjectDirs:
removed = cleanup_stale_project_dirs()
assert removed == 0
def test_scoped_removes_only_target_dir(self, tmp_path, monkeypatch):
"""When encoded_cwd is supplied only that directory is swept."""
import time
from backend.copilot.sdk.transcript import (
_STALE_PROJECT_DIR_SECONDS,
cleanup_stale_project_dirs,
)
projects_dir = tmp_path / "projects"
projects_dir.mkdir()
monkeypatch.setattr(
"backend.copilot.sdk.transcript._projects_base",
lambda: str(projects_dir),
)
old_time = time.time() - _STALE_PROJECT_DIR_SECONDS - 100
# Two stale copilot dirs
target = projects_dir / "-tmp-copilot-session-abc"
target.mkdir()
os.utime(target, (old_time, old_time))
other = projects_dir / "-tmp-copilot-session-xyz"
other.mkdir()
os.utime(other, (old_time, old_time))
# Only the target dir should be removed
removed = cleanup_stale_project_dirs(encoded_cwd="-tmp-copilot-session-abc")
assert removed == 1
assert not target.exists()
assert other.exists() # untouched — not the current session
def test_scoped_fresh_dir_not_removed(self, tmp_path, monkeypatch):
"""Scoped sweep leaves a fresh directory alone."""
from backend.copilot.sdk.transcript import cleanup_stale_project_dirs
projects_dir = tmp_path / "projects"
projects_dir.mkdir()
monkeypatch.setattr(
"backend.copilot.sdk.transcript._projects_base",
lambda: str(projects_dir),
)
fresh = projects_dir / "-tmp-copilot-session-new"
fresh.mkdir()
# mtime is now — well within TTL
removed = cleanup_stale_project_dirs(encoded_cwd="-tmp-copilot-session-new")
assert removed == 0
assert fresh.exists()
def test_scoped_non_copilot_dir_not_removed(self, tmp_path, monkeypatch):
"""Scoped sweep refuses to remove a non-copilot directory."""
import time
from backend.copilot.sdk.transcript import (
_STALE_PROJECT_DIR_SECONDS,
cleanup_stale_project_dirs,
)
projects_dir = tmp_path / "projects"
projects_dir.mkdir()
monkeypatch.setattr(
"backend.copilot.sdk.transcript._projects_base",
lambda: str(projects_dir),
)
old_time = time.time() - _STALE_PROJECT_DIR_SECONDS - 100
non_copilot = projects_dir / "some-other-project"
non_copilot.mkdir()
os.utime(non_copilot, (old_time, old_time))
removed = cleanup_stale_project_dirs(encoded_cwd="some-other-project")
assert removed == 0
assert non_copilot.exists()