mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-24 03:00:28 -05:00
Compare commits
9 Commits
feat/copil
...
fix/claude
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7bc08672fa | ||
|
|
be2a48aedb | ||
|
|
aeca4dbb79 | ||
|
|
e8b8cad97a | ||
|
|
be35c626ad | ||
|
|
719c4ee1d1 | ||
|
|
411c399e03 | ||
|
|
6ac011e36c | ||
|
|
5e554526e2 |
@@ -50,6 +50,7 @@ from backend.copilot.tools.models import (
|
||||
OperationPendingResponse,
|
||||
OperationStartedResponse,
|
||||
SetupRequirementsResponse,
|
||||
SuggestedGoalResponse,
|
||||
UnderstandingUpdatedResponse,
|
||||
)
|
||||
from backend.copilot.tracking import track_user_message
|
||||
@@ -984,6 +985,7 @@ ToolResponseUnion = (
|
||||
| AgentPreviewResponse
|
||||
| AgentSavedResponse
|
||||
| ClarificationNeededResponse
|
||||
| SuggestedGoalResponse
|
||||
| BlockListResponse
|
||||
| BlockDetailsResponse
|
||||
| BlockOutputResponse
|
||||
|
||||
@@ -187,9 +187,11 @@ class ClaudeCodeBlock(Block):
|
||||
)
|
||||
files: list[SandboxFileOutput] = SchemaField(
|
||||
description=(
|
||||
"List of text files created/modified by Claude Code during this execution. "
|
||||
"List of files created/modified by Claude Code during this execution. "
|
||||
"Includes text files and binary files (images, PDFs, etc.). "
|
||||
"Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. "
|
||||
"workspace_ref contains a workspace:// URI if the file was stored to workspace."
|
||||
"workspace_ref contains a workspace:// URI for workspace storage. "
|
||||
"For binary files, content contains a placeholder; use workspace_ref to access the file."
|
||||
)
|
||||
)
|
||||
conversation_history: str = SchemaField(
|
||||
@@ -452,13 +454,15 @@ class ClaudeCodeBlock(Block):
|
||||
else:
|
||||
new_conversation_history = turn_entry
|
||||
|
||||
# Extract files created/modified during this run and store to workspace
|
||||
# Extract files created/modified during this run and store to workspace.
|
||||
# Binary files (images, PDFs, etc.) are stored via store_media_file
|
||||
# which handles virus scanning and workspace storage.
|
||||
sandbox_files = await extract_and_store_sandbox_files(
|
||||
sandbox=sandbox,
|
||||
working_directory=working_directory,
|
||||
execution_context=execution_context,
|
||||
since_timestamp=start_timestamp,
|
||||
text_only=True,
|
||||
text_only=False,
|
||||
)
|
||||
|
||||
return (
|
||||
|
||||
@@ -16,7 +16,6 @@ from .tool_adapter import (
|
||||
DANGEROUS_PATTERNS,
|
||||
MCP_TOOL_PREFIX,
|
||||
WORKSPACE_SCOPED_TOOLS,
|
||||
get_sandbox_manager,
|
||||
stash_pending_tool_output,
|
||||
)
|
||||
|
||||
@@ -98,10 +97,8 @@ def _validate_tool_access(
|
||||
"Use the CoPilot-specific MCP tools instead."
|
||||
)
|
||||
|
||||
# Workspace-scoped tools: allowed only within the SDK workspace directory.
|
||||
# When e2b is enabled, these SDK built-in tools are disabled (replaced by
|
||||
# MCP e2b file tools), so skip workspace path validation.
|
||||
if tool_name in WORKSPACE_SCOPED_TOOLS and get_sandbox_manager() is None:
|
||||
# Workspace-scoped tools: allowed only within the SDK workspace directory
|
||||
if tool_name in WORKSPACE_SCOPED_TOOLS:
|
||||
return _validate_workspace_path(tool_name, tool_input, sdk_cwd)
|
||||
|
||||
# Check for dangerous patterns in tool input
|
||||
|
||||
@@ -58,9 +58,6 @@ from .transcript import (
|
||||
logger = logging.getLogger(__name__)
|
||||
config = ChatConfig()
|
||||
|
||||
# SDK built-in file tools to disable when e2b is active (replaced by MCP tools)
|
||||
_E2B_DISALLOWED_SDK_TOOLS = ["Read", "Write", "Edit", "Glob", "Grep"]
|
||||
|
||||
# Set to hold background tasks to prevent garbage collection
|
||||
_background_tasks: set[asyncio.Task[Any]] = set()
|
||||
|
||||
@@ -101,23 +98,6 @@ _SDK_TOOL_SUPPLEMENT = """
|
||||
is delivered to the user via a background stream.
|
||||
"""
|
||||
|
||||
_SDK_TOOL_SUPPLEMENT_E2B = """
|
||||
|
||||
## Tool notes
|
||||
|
||||
- The SDK built-in Bash, Read, Write, Edit, Glob, and Grep tools are NOT available.
|
||||
Use the MCP tools instead: `bash_exec`, `read_file`, `write_file`, `edit_file`,
|
||||
`glob_files`, `grep_files`.
|
||||
- **All tools share a single sandbox**: The sandbox is a microVM with a shared
|
||||
filesystem at /home/user/. Files created by any tool are accessible to all others.
|
||||
Network access IS available (pip install, curl, etc.).
|
||||
- **Persistent storage**: Use `save_to_workspace` to persist sandbox files to cloud
|
||||
storage, and `load_from_workspace` to bring workspace files into the sandbox.
|
||||
- Long-running tools (create_agent, edit_agent, etc.) are handled
|
||||
asynchronously. You will receive an immediate response; the actual result
|
||||
is delivered to the user via a background stream.
|
||||
"""
|
||||
|
||||
|
||||
def _build_long_running_callback(user_id: str | None) -> LongRunningCallback:
|
||||
"""Build a callback that delegates long-running tools to the non-SDK infrastructure.
|
||||
@@ -473,33 +453,12 @@ async def stream_chat_completion_sdk(
|
||||
_background_tasks.add(task)
|
||||
task.add_done_callback(_background_tasks.discard)
|
||||
|
||||
# Check if e2b sandbox is enabled for this user
|
||||
sandbox_mgr = None
|
||||
use_e2b = False
|
||||
try:
|
||||
from backend.util.feature_flag import Flag
|
||||
from backend.util.feature_flag import is_feature_enabled as _is_flag_enabled
|
||||
from backend.util.settings import Config as AppConfig
|
||||
|
||||
app_config = AppConfig()
|
||||
use_e2b = await _is_flag_enabled(
|
||||
Flag.COPILOT_E2B,
|
||||
user_id or "anonymous",
|
||||
default=app_config.copilot_use_e2b,
|
||||
)
|
||||
if use_e2b:
|
||||
from backend.copilot.tools.e2b_sandbox import CoPilotSandboxManager
|
||||
|
||||
sandbox_mgr = CoPilotSandboxManager()
|
||||
except Exception as e:
|
||||
logger.warning(f"[SDK] Failed to initialize e2b sandbox: {e}")
|
||||
|
||||
# Build system prompt (reuses non-SDK path with Langfuse support)
|
||||
has_history = len(session.messages) > 1
|
||||
system_prompt, _ = await _build_system_prompt(
|
||||
user_id, has_conversation_history=has_history
|
||||
)
|
||||
system_prompt += _SDK_TOOL_SUPPLEMENT_E2B if use_e2b else _SDK_TOOL_SUPPLEMENT
|
||||
system_prompt += _SDK_TOOL_SUPPLEMENT
|
||||
message_id = str(uuid.uuid4())
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
@@ -521,7 +480,6 @@ async def stream_chat_completion_sdk(
|
||||
user_id,
|
||||
session,
|
||||
long_running_callback=_build_long_running_callback(user_id),
|
||||
sandbox_manager=sandbox_mgr,
|
||||
)
|
||||
try:
|
||||
from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
|
||||
@@ -573,21 +531,11 @@ async def stream_chat_completion_sdk(
|
||||
f"msg_count={transcript_msg_count})"
|
||||
)
|
||||
|
||||
# When e2b is active, disable SDK built-in file tools
|
||||
# (replaced by MCP e2b tools) and remove them from allowed list
|
||||
effective_disallowed = list(SDK_DISALLOWED_TOOLS)
|
||||
effective_allowed = list(COPILOT_TOOL_NAMES)
|
||||
if use_e2b:
|
||||
effective_disallowed.extend(_E2B_DISALLOWED_SDK_TOOLS)
|
||||
effective_allowed = [
|
||||
t for t in effective_allowed if t not in _E2B_DISALLOWED_SDK_TOOLS
|
||||
]
|
||||
|
||||
sdk_options_kwargs: dict[str, Any] = {
|
||||
"system_prompt": system_prompt,
|
||||
"mcp_servers": {"copilot": mcp_server},
|
||||
"allowed_tools": effective_allowed,
|
||||
"disallowed_tools": effective_disallowed,
|
||||
"allowed_tools": COPILOT_TOOL_NAMES,
|
||||
"disallowed_tools": SDK_DISALLOWED_TOOLS,
|
||||
"hooks": security_hooks,
|
||||
"cwd": sdk_cwd,
|
||||
"max_buffer_size": config.claude_agent_max_buffer_size,
|
||||
@@ -801,11 +749,6 @@ async def stream_chat_completion_sdk(
|
||||
)
|
||||
yield StreamFinish()
|
||||
finally:
|
||||
if sandbox_mgr:
|
||||
try:
|
||||
await sandbox_mgr.dispose_all()
|
||||
except Exception as e:
|
||||
logger.warning(f"[SDK] Failed to dispose e2b sandboxes: {e}")
|
||||
if sdk_cwd:
|
||||
_cleanup_sdk_tool_results(sdk_cwd)
|
||||
|
||||
|
||||
@@ -42,8 +42,7 @@ _current_session: ContextVar[ChatSession | None] = ContextVar(
|
||||
# Keyed by tool_name → full output string. Consumed (popped) by the
|
||||
# response adapter when it builds StreamToolOutputAvailable.
|
||||
_pending_tool_outputs: ContextVar[dict[str, list[str]]] = ContextVar(
|
||||
"pending_tool_outputs",
|
||||
default=None, # type: ignore[arg-type]
|
||||
"pending_tool_outputs", default=None # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
# Callback type for delegating long-running tools to the non-SDK infrastructure.
|
||||
@@ -57,15 +56,11 @@ _long_running_callback: ContextVar[LongRunningCallback | None] = ContextVar(
|
||||
"long_running_callback", default=None
|
||||
)
|
||||
|
||||
# ContextVar for the e2b sandbox manager (set when e2b is enabled).
|
||||
_sandbox_manager: ContextVar[Any | None] = ContextVar("sandbox_manager", default=None)
|
||||
|
||||
|
||||
def set_execution_context(
|
||||
user_id: str | None,
|
||||
session: ChatSession,
|
||||
long_running_callback: LongRunningCallback | None = None,
|
||||
sandbox_manager: Any | None = None,
|
||||
) -> None:
|
||||
"""Set the execution context for tool calls.
|
||||
|
||||
@@ -77,13 +72,11 @@ def set_execution_context(
|
||||
session: Current chat session.
|
||||
long_running_callback: Optional callback to delegate long-running tools
|
||||
to the non-SDK background infrastructure (stream_registry + Redis).
|
||||
sandbox_manager: Optional CoPilotSandboxManager for e2b sandbox access.
|
||||
"""
|
||||
_current_user_id.set(user_id)
|
||||
_current_session.set(session)
|
||||
_pending_tool_outputs.set({})
|
||||
_long_running_callback.set(long_running_callback)
|
||||
_sandbox_manager.set(sandbox_manager)
|
||||
|
||||
|
||||
def get_execution_context() -> tuple[str | None, ChatSession | None]:
|
||||
@@ -94,11 +87,6 @@ def get_execution_context() -> tuple[str | None, ChatSession | None]:
|
||||
)
|
||||
|
||||
|
||||
def get_sandbox_manager() -> Any | None:
|
||||
"""Get the current e2b sandbox manager from execution context."""
|
||||
return _sandbox_manager.get(None)
|
||||
|
||||
|
||||
def pop_pending_tool_output(tool_name: str) -> str | None:
|
||||
"""Pop and return the oldest stashed output for *tool_name*.
|
||||
|
||||
|
||||
@@ -118,6 +118,8 @@ Adapt flexibly to the conversation context. Not every interaction requires all s
|
||||
- Find reusable components with `find_block`
|
||||
- Create custom solutions with `create_agent` if nothing suitable exists
|
||||
- Modify existing library agents with `edit_agent`
|
||||
- **When `create_agent` returns `suggested_goal`**: Present the suggestion to the user and ask "Would you like me to proceed with this refined goal?" If they accept, call `create_agent` again with the suggested goal.
|
||||
- **When `create_agent` returns `clarifying_questions`**: After the user answers, call `create_agent` again with the original description AND the answers in the `context` parameter.
|
||||
|
||||
5. **Execute**: Run automations immediately, schedule them, or set up webhooks using `run_agent`. Test specific components with `run_block`.
|
||||
|
||||
@@ -164,6 +166,11 @@ Adapt flexibly to the conversation context. Not every interaction requires all s
|
||||
- Use `add_understanding` to capture valuable business context
|
||||
- When tool calls fail, try alternative approaches
|
||||
|
||||
**Handle Feedback Loops:**
|
||||
- When a tool returns a suggested alternative (like a refined goal), present it clearly and ask the user for confirmation before proceeding
|
||||
- When clarifying questions are answered, immediately re-call the tool with the accumulated context
|
||||
- Don't ask redundant questions if the user has already provided context in the conversation
|
||||
|
||||
## CRITICAL REMINDER
|
||||
|
||||
You are NOT a chatbot. You are NOT documentation. You are a partner who helps busy business owners get value quickly by showing proof through working automations. Bias toward action over explanation."""
|
||||
|
||||
@@ -13,15 +13,6 @@ from .bash_exec import BashExecTool
|
||||
from .check_operation_status import CheckOperationStatusTool
|
||||
from .create_agent import CreateAgentTool
|
||||
from .customize_agent import CustomizeAgentTool
|
||||
from .e2b_file_tools import (
|
||||
E2BEditTool,
|
||||
E2BGlobTool,
|
||||
E2BGrepTool,
|
||||
E2BReadTool,
|
||||
E2BWriteTool,
|
||||
LoadFromWorkspaceTool,
|
||||
SaveToWorkspaceTool,
|
||||
)
|
||||
from .edit_agent import EditAgentTool
|
||||
from .feature_requests import CreateFeatureRequestTool, SearchFeatureRequestsTool
|
||||
from .find_agent import FindAgentTool
|
||||
@@ -72,14 +63,6 @@ TOOL_REGISTRY: dict[str, BaseTool] = {
|
||||
"read_workspace_file": ReadWorkspaceFileTool(),
|
||||
"write_workspace_file": WriteWorkspaceFileTool(),
|
||||
"delete_workspace_file": DeleteWorkspaceFileTool(),
|
||||
# E2B sandbox file tools (active when COPILOT_E2B feature flag is enabled)
|
||||
"read_file": E2BReadTool(),
|
||||
"write_file": E2BWriteTool(),
|
||||
"edit_file": E2BEditTool(),
|
||||
"glob_files": E2BGlobTool(),
|
||||
"grep_files": E2BGrepTool(),
|
||||
"save_to_workspace": SaveToWorkspaceTool(),
|
||||
"load_from_workspace": LoadFromWorkspaceTool(),
|
||||
}
|
||||
|
||||
# Export individual tool instances for backwards compatibility
|
||||
|
||||
@@ -1,15 +1,14 @@
|
||||
"""Bash execution tool — run shell commands in a sandbox.
|
||||
|
||||
Supports two backends:
|
||||
- **e2b** (preferred): VM-level isolation with network access, enabled via
|
||||
the COPILOT_E2B feature flag.
|
||||
- **bubblewrap** (fallback): kernel-level isolation, no network, Linux-only.
|
||||
"""Bash execution tool — run shell commands in a bubblewrap sandbox.
|
||||
|
||||
Full Bash scripting is allowed (loops, conditionals, pipes, functions, etc.).
|
||||
Safety comes from OS-level isolation (bubblewrap): only system dirs visible
|
||||
read-only, writable workspace only, clean env, no network.
|
||||
|
||||
Requires bubblewrap (``bwrap``) — the tool is disabled when bwrap is not
|
||||
available (e.g. macOS development).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import shlex
|
||||
from typing import Any
|
||||
|
||||
from backend.copilot.model import ChatSession
|
||||
@@ -20,8 +19,6 @@ from .sandbox import get_workspace_dir, has_full_sandbox, run_sandboxed
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_SANDBOX_HOME = "/home/user"
|
||||
|
||||
|
||||
class BashExecTool(BaseTool):
|
||||
"""Execute Bash commands in a bubblewrap sandbox."""
|
||||
@@ -32,18 +29,6 @@ class BashExecTool(BaseTool):
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
if _is_e2b_available():
|
||||
return (
|
||||
"Execute a Bash command or script in an e2b sandbox (microVM). "
|
||||
"Full Bash scripting is supported (loops, conditionals, pipes, "
|
||||
"functions, etc.). "
|
||||
"The sandbox shares the same filesystem as the read_file/write_file "
|
||||
"tools — files created by any tool are accessible to all others. "
|
||||
"Network access IS available (pip install, curl, etc.). "
|
||||
"Working directory is /home/user/. "
|
||||
"Execution is killed after the timeout (default 30s, max 120s). "
|
||||
"Returns stdout and stderr."
|
||||
)
|
||||
if not has_full_sandbox():
|
||||
return (
|
||||
"Bash execution is DISABLED — bubblewrap sandbox is not "
|
||||
@@ -100,6 +85,13 @@ class BashExecTool(BaseTool):
|
||||
) -> ToolResponseBase:
|
||||
session_id = session.session_id if session else None
|
||||
|
||||
if not has_full_sandbox():
|
||||
return ErrorResponse(
|
||||
message="bash_exec requires bubblewrap sandbox (Linux only).",
|
||||
error="sandbox_unavailable",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
command: str = (kwargs.get("command") or "").strip()
|
||||
timeout: int = kwargs.get("timeout", 30)
|
||||
|
||||
@@ -110,20 +102,6 @@ class BashExecTool(BaseTool):
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# --- E2B path ---
|
||||
if _is_e2b_available():
|
||||
return await self._execute_e2b(
|
||||
command, timeout, session, user_id, session_id
|
||||
)
|
||||
|
||||
# --- Bubblewrap fallback ---
|
||||
if not has_full_sandbox():
|
||||
return ErrorResponse(
|
||||
message="bash_exec requires bubblewrap sandbox (Linux only).",
|
||||
error="sandbox_unavailable",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
workspace = get_workspace_dir(session_id or "default")
|
||||
|
||||
stdout, stderr, exit_code, timed_out = await run_sandboxed(
|
||||
@@ -144,72 +122,3 @@ class BashExecTool(BaseTool):
|
||||
timed_out=timed_out,
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
async def _execute_e2b(
|
||||
self,
|
||||
command: str,
|
||||
timeout: int,
|
||||
session: ChatSession,
|
||||
user_id: str | None,
|
||||
session_id: str | None,
|
||||
) -> ToolResponseBase:
|
||||
"""Execute command in e2b sandbox."""
|
||||
try:
|
||||
from backend.copilot.sdk.tool_adapter import get_sandbox_manager
|
||||
|
||||
manager = get_sandbox_manager()
|
||||
if manager is None:
|
||||
return ErrorResponse(
|
||||
message="E2B sandbox manager not available.",
|
||||
error="sandbox_unavailable",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
sandbox = await manager.get_or_create(
|
||||
session_id or "default", user_id or "anonymous"
|
||||
)
|
||||
result = await sandbox.commands.run(
|
||||
f"bash -c {shlex.quote(command)}",
|
||||
cwd=_SANDBOX_HOME,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
return BashExecResponse(
|
||||
message=f"Command executed (exit {result.exit_code})",
|
||||
stdout=result.stdout,
|
||||
stderr=result.stderr,
|
||||
exit_code=result.exit_code,
|
||||
timed_out=False,
|
||||
session_id=session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
error_str = str(e)
|
||||
if "timeout" in error_str.lower():
|
||||
return BashExecResponse(
|
||||
message="Execution timed out",
|
||||
stdout="",
|
||||
stderr=f"Execution timed out after {timeout}s",
|
||||
exit_code=-1,
|
||||
timed_out=True,
|
||||
session_id=session_id,
|
||||
)
|
||||
return ErrorResponse(
|
||||
message=f"E2B execution failed: {e}",
|
||||
error=error_str,
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Module-level helpers (placed after classes that call them)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
def _is_e2b_available() -> bool:
|
||||
"""Check if e2b sandbox is available via execution context."""
|
||||
try:
|
||||
from backend.copilot.sdk.tool_adapter import get_sandbox_manager
|
||||
|
||||
return get_sandbox_manager() is not None
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@@ -22,6 +22,7 @@ from .models import (
|
||||
ClarificationNeededResponse,
|
||||
ClarifyingQuestion,
|
||||
ErrorResponse,
|
||||
SuggestedGoalResponse,
|
||||
ToolResponseBase,
|
||||
)
|
||||
|
||||
@@ -186,26 +187,28 @@ class CreateAgentTool(BaseTool):
|
||||
if decomposition_result.get("type") == "unachievable_goal":
|
||||
suggested = decomposition_result.get("suggested_goal", "")
|
||||
reason = decomposition_result.get("reason", "")
|
||||
return ErrorResponse(
|
||||
return SuggestedGoalResponse(
|
||||
message=(
|
||||
f"This goal cannot be accomplished with the available blocks. "
|
||||
f"{reason} "
|
||||
f"Suggestion: {suggested}"
|
||||
f"This goal cannot be accomplished with the available blocks. {reason}"
|
||||
),
|
||||
error="unachievable_goal",
|
||||
details={"suggested_goal": suggested, "reason": reason},
|
||||
suggested_goal=suggested,
|
||||
reason=reason,
|
||||
original_goal=description,
|
||||
goal_type="unachievable",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
if decomposition_result.get("type") == "vague_goal":
|
||||
suggested = decomposition_result.get("suggested_goal", "")
|
||||
return ErrorResponse(
|
||||
message=(
|
||||
f"The goal is too vague to create a specific workflow. "
|
||||
f"Suggestion: {suggested}"
|
||||
),
|
||||
error="vague_goal",
|
||||
details={"suggested_goal": suggested},
|
||||
reason = decomposition_result.get(
|
||||
"reason", "The goal needs more specific details"
|
||||
)
|
||||
return SuggestedGoalResponse(
|
||||
message="The goal is too vague to create a specific workflow.",
|
||||
suggested_goal=suggested,
|
||||
reason=reason,
|
||||
original_goal=description,
|
||||
goal_type="vague",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
"""Tests for CreateAgentTool response types."""
|
||||
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.copilot.tools.create_agent import CreateAgentTool
|
||||
from backend.copilot.tools.models import (
|
||||
ClarificationNeededResponse,
|
||||
ErrorResponse,
|
||||
SuggestedGoalResponse,
|
||||
)
|
||||
|
||||
from ._test_data import make_session
|
||||
|
||||
_TEST_USER_ID = "test-user-create-agent"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tool():
|
||||
return CreateAgentTool()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def session():
|
||||
return make_session(_TEST_USER_ID)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_description_returns_error(tool, session):
|
||||
"""Missing description returns ErrorResponse."""
|
||||
result = await tool._execute(user_id=_TEST_USER_ID, session=session, description="")
|
||||
assert isinstance(result, ErrorResponse)
|
||||
assert result.error == "Missing description parameter"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vague_goal_returns_suggested_goal_response(tool, session):
|
||||
"""vague_goal decomposition result returns SuggestedGoalResponse, not ErrorResponse."""
|
||||
vague_result = {
|
||||
"type": "vague_goal",
|
||||
"suggested_goal": "Monitor Twitter mentions for a specific keyword and send a daily digest email",
|
||||
}
|
||||
|
||||
with (
|
||||
patch(
|
||||
"backend.copilot.tools.create_agent.get_all_relevant_agents_for_generation",
|
||||
new_callable=AsyncMock,
|
||||
return_value=[],
|
||||
),
|
||||
patch(
|
||||
"backend.copilot.tools.create_agent.decompose_goal",
|
||||
new_callable=AsyncMock,
|
||||
return_value=vague_result,
|
||||
),
|
||||
):
|
||||
result = await tool._execute(
|
||||
user_id=_TEST_USER_ID,
|
||||
session=session,
|
||||
description="monitor social media",
|
||||
)
|
||||
|
||||
assert isinstance(result, SuggestedGoalResponse)
|
||||
assert result.goal_type == "vague"
|
||||
assert result.suggested_goal == vague_result["suggested_goal"]
|
||||
assert result.original_goal == "monitor social media"
|
||||
assert result.reason == "The goal needs more specific details"
|
||||
assert not isinstance(result, ErrorResponse)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unachievable_goal_returns_suggested_goal_response(tool, session):
|
||||
"""unachievable_goal decomposition result returns SuggestedGoalResponse, not ErrorResponse."""
|
||||
unachievable_result = {
|
||||
"type": "unachievable_goal",
|
||||
"suggested_goal": "Summarize the latest news articles on a topic and send them by email",
|
||||
"reason": "There are no blocks for mind-reading.",
|
||||
}
|
||||
|
||||
with (
|
||||
patch(
|
||||
"backend.copilot.tools.create_agent.get_all_relevant_agents_for_generation",
|
||||
new_callable=AsyncMock,
|
||||
return_value=[],
|
||||
),
|
||||
patch(
|
||||
"backend.copilot.tools.create_agent.decompose_goal",
|
||||
new_callable=AsyncMock,
|
||||
return_value=unachievable_result,
|
||||
),
|
||||
):
|
||||
result = await tool._execute(
|
||||
user_id=_TEST_USER_ID,
|
||||
session=session,
|
||||
description="read my mind",
|
||||
)
|
||||
|
||||
assert isinstance(result, SuggestedGoalResponse)
|
||||
assert result.goal_type == "unachievable"
|
||||
assert result.suggested_goal == unachievable_result["suggested_goal"]
|
||||
assert result.original_goal == "read my mind"
|
||||
assert result.reason == unachievable_result["reason"]
|
||||
assert not isinstance(result, ErrorResponse)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_clarifying_questions_returns_clarification_needed_response(
|
||||
tool, session
|
||||
):
|
||||
"""clarifying_questions decomposition result returns ClarificationNeededResponse."""
|
||||
clarifying_result = {
|
||||
"type": "clarifying_questions",
|
||||
"questions": [
|
||||
{
|
||||
"question": "What platform should be monitored?",
|
||||
"keyword": "platform",
|
||||
"example": "Twitter, Reddit",
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
with (
|
||||
patch(
|
||||
"backend.copilot.tools.create_agent.get_all_relevant_agents_for_generation",
|
||||
new_callable=AsyncMock,
|
||||
return_value=[],
|
||||
),
|
||||
patch(
|
||||
"backend.copilot.tools.create_agent.decompose_goal",
|
||||
new_callable=AsyncMock,
|
||||
return_value=clarifying_result,
|
||||
),
|
||||
):
|
||||
result = await tool._execute(
|
||||
user_id=_TEST_USER_ID,
|
||||
session=session,
|
||||
description="monitor social media and alert me",
|
||||
)
|
||||
|
||||
assert isinstance(result, ClarificationNeededResponse)
|
||||
assert len(result.questions) == 1
|
||||
assert result.questions[0].keyword == "platform"
|
||||
@@ -1,703 +0,0 @@
|
||||
"""E2B file tools — MCP tools that proxy filesystem operations to the e2b sandbox.
|
||||
|
||||
These replace the SDK built-in Read/Write/Edit/Glob/Grep tools when e2b is
|
||||
enabled, ensuring all file operations go through the sandbox VM.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import posixpath
|
||||
import shlex
|
||||
from typing import Any
|
||||
|
||||
from backend.copilot.model import ChatSession
|
||||
|
||||
from .base import BaseTool
|
||||
from .models import BashExecResponse, ErrorResponse, ToolResponseBase
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_SANDBOX_HOME = "/home/user"
|
||||
|
||||
|
||||
class E2BReadTool(BaseTool):
|
||||
"""Read a file from the e2b sandbox filesystem."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "read_file"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return (
|
||||
"Read a file from the sandbox filesystem. "
|
||||
"The sandbox is the shared working environment — files created by "
|
||||
"any tool (bash_exec, write_file, etc.) are accessible here. "
|
||||
"Returns the file content as text. "
|
||||
"Use offset and limit for large files."
|
||||
)
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Path to the file to read (relative to /home/user/ "
|
||||
"or absolute within /home/user/)."
|
||||
),
|
||||
},
|
||||
"offset": {
|
||||
"type": "integer",
|
||||
"description": (
|
||||
"Line number to start reading from (0-indexed). Default: 0"
|
||||
),
|
||||
},
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"description": "Number of lines to read. Default: 2000",
|
||||
},
|
||||
},
|
||||
"required": ["path"],
|
||||
}
|
||||
|
||||
@property
|
||||
def requires_auth(self) -> bool:
|
||||
return False
|
||||
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
session: ChatSession,
|
||||
**kwargs: Any,
|
||||
) -> ToolResponseBase:
|
||||
path = kwargs.get("path", "")
|
||||
offset = kwargs.get("offset", 0)
|
||||
limit = kwargs.get("limit", 2000)
|
||||
|
||||
sandbox = await _get_sandbox(session)
|
||||
if sandbox is None:
|
||||
return _sandbox_unavailable(session)
|
||||
|
||||
resolved = _resolve_path(path)
|
||||
if resolved is None:
|
||||
return _path_error(path, session)
|
||||
|
||||
try:
|
||||
content = await sandbox.files.read(resolved)
|
||||
lines = content.splitlines(keepends=True)
|
||||
selected = lines[offset : offset + limit]
|
||||
text = "".join(selected)
|
||||
return BashExecResponse(
|
||||
message=f"Read {len(selected)} lines from {resolved}",
|
||||
stdout=text,
|
||||
stderr="",
|
||||
exit_code=0,
|
||||
timed_out=False,
|
||||
session_id=session.session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
return ErrorResponse(
|
||||
message=f"Failed to read {resolved}: {e}",
|
||||
error=str(e),
|
||||
session_id=session.session_id,
|
||||
)
|
||||
|
||||
|
||||
class E2BWriteTool(BaseTool):
|
||||
"""Write a file to the e2b sandbox filesystem."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "write_file"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return (
|
||||
"Write or create a file in the sandbox filesystem. "
|
||||
"This is the shared working environment — files are accessible "
|
||||
"to bash_exec and other tools. "
|
||||
"Creates parent directories automatically."
|
||||
)
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Path for the file (relative to /home/user/ "
|
||||
"or absolute within /home/user/)."
|
||||
),
|
||||
},
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "Content to write to the file.",
|
||||
},
|
||||
},
|
||||
"required": ["path", "content"],
|
||||
}
|
||||
|
||||
@property
|
||||
def requires_auth(self) -> bool:
|
||||
return False
|
||||
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
session: ChatSession,
|
||||
**kwargs: Any,
|
||||
) -> ToolResponseBase:
|
||||
path = kwargs.get("path", "")
|
||||
content = kwargs.get("content", "")
|
||||
|
||||
sandbox = await _get_sandbox(session)
|
||||
if sandbox is None:
|
||||
return _sandbox_unavailable(session)
|
||||
|
||||
resolved = _resolve_path(path)
|
||||
if resolved is None:
|
||||
return _path_error(path, session)
|
||||
|
||||
try:
|
||||
# Ensure parent directory exists
|
||||
parent = posixpath.dirname(resolved)
|
||||
if parent and parent != _SANDBOX_HOME:
|
||||
await sandbox.commands.run(f"mkdir -p {parent}", timeout=5)
|
||||
await sandbox.files.write(resolved, content)
|
||||
return BashExecResponse(
|
||||
message=f"Wrote {len(content)} bytes to {resolved}",
|
||||
stdout=f"Successfully wrote to {resolved}",
|
||||
stderr="",
|
||||
exit_code=0,
|
||||
timed_out=False,
|
||||
session_id=session.session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
return ErrorResponse(
|
||||
message=f"Failed to write {resolved}: {e}",
|
||||
error=str(e),
|
||||
session_id=session.session_id,
|
||||
)
|
||||
|
||||
|
||||
class E2BEditTool(BaseTool):
|
||||
"""Edit a file in the e2b sandbox using search/replace."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "edit_file"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return (
|
||||
"Edit a file in the sandbox by replacing exact text. "
|
||||
"Provide old_text (the exact text to find) and new_text "
|
||||
"(what to replace it with). The old_text must match exactly."
|
||||
)
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Path to the file (relative to /home/user/ "
|
||||
"or absolute within /home/user/)."
|
||||
),
|
||||
},
|
||||
"old_text": {
|
||||
"type": "string",
|
||||
"description": "Exact text to find in the file.",
|
||||
},
|
||||
"new_text": {
|
||||
"type": "string",
|
||||
"description": "Text to replace old_text with.",
|
||||
},
|
||||
},
|
||||
"required": ["path", "old_text", "new_text"],
|
||||
}
|
||||
|
||||
@property
|
||||
def requires_auth(self) -> bool:
|
||||
return False
|
||||
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
session: ChatSession,
|
||||
**kwargs: Any,
|
||||
) -> ToolResponseBase:
|
||||
path = kwargs.get("path", "")
|
||||
old_text = kwargs.get("old_text", "")
|
||||
new_text = kwargs.get("new_text", "")
|
||||
|
||||
sandbox = await _get_sandbox(session)
|
||||
if sandbox is None:
|
||||
return _sandbox_unavailable(session)
|
||||
|
||||
resolved = _resolve_path(path)
|
||||
if resolved is None:
|
||||
return _path_error(path, session)
|
||||
|
||||
try:
|
||||
content = await sandbox.files.read(resolved)
|
||||
occurrences = content.count(old_text)
|
||||
if occurrences == 0:
|
||||
return ErrorResponse(
|
||||
message=f"old_text not found in {resolved}",
|
||||
error="text_not_found",
|
||||
session_id=session.session_id,
|
||||
)
|
||||
if occurrences > 1:
|
||||
return ErrorResponse(
|
||||
message=(
|
||||
f"old_text found {occurrences} times in {resolved}. "
|
||||
"Please provide more context to make the match unique."
|
||||
),
|
||||
error="ambiguous_match",
|
||||
session_id=session.session_id,
|
||||
)
|
||||
new_content = content.replace(old_text, new_text, 1)
|
||||
await sandbox.files.write(resolved, new_content)
|
||||
return BashExecResponse(
|
||||
message=f"Edited {resolved}",
|
||||
stdout=f"Successfully edited {resolved}",
|
||||
stderr="",
|
||||
exit_code=0,
|
||||
timed_out=False,
|
||||
session_id=session.session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
return ErrorResponse(
|
||||
message=f"Failed to edit {resolved}: {e}",
|
||||
error=str(e),
|
||||
session_id=session.session_id,
|
||||
)
|
||||
|
||||
|
||||
class E2BGlobTool(BaseTool):
|
||||
"""List files matching a pattern in the e2b sandbox."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "glob_files"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return (
|
||||
"List files in the sandbox matching a glob pattern. "
|
||||
"Uses find under the hood. Default directory is /home/user/."
|
||||
)
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"pattern": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Glob pattern to match (e.g., '*.py', '**/*.json')."
|
||||
),
|
||||
},
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": ("Directory to search in (default: /home/user/)."),
|
||||
},
|
||||
},
|
||||
"required": ["pattern"],
|
||||
}
|
||||
|
||||
@property
|
||||
def requires_auth(self) -> bool:
|
||||
return False
|
||||
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
session: ChatSession,
|
||||
**kwargs: Any,
|
||||
) -> ToolResponseBase:
|
||||
pattern = kwargs.get("pattern", "*")
|
||||
path = kwargs.get("path", _SANDBOX_HOME)
|
||||
|
||||
sandbox = await _get_sandbox(session)
|
||||
if sandbox is None:
|
||||
return _sandbox_unavailable(session)
|
||||
|
||||
resolved = _resolve_path(path)
|
||||
if resolved is None:
|
||||
return _path_error(path, session)
|
||||
|
||||
try:
|
||||
result = await sandbox.commands.run(
|
||||
f"find {resolved} -name {shlex.quote(pattern)} -type f 2>/dev/null",
|
||||
timeout=15,
|
||||
)
|
||||
return BashExecResponse(
|
||||
message="Glob results",
|
||||
stdout=result.stdout,
|
||||
stderr=result.stderr,
|
||||
exit_code=result.exit_code,
|
||||
timed_out=False,
|
||||
session_id=session.session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
return ErrorResponse(
|
||||
message=f"Failed to glob: {e}",
|
||||
error=str(e),
|
||||
session_id=session.session_id,
|
||||
)
|
||||
|
||||
|
||||
class E2BGrepTool(BaseTool):
|
||||
"""Search file contents in the e2b sandbox."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "grep_files"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return (
|
||||
"Search for a pattern in files within the sandbox. "
|
||||
"Uses grep -rn under the hood. Returns matching lines with "
|
||||
"file paths and line numbers."
|
||||
)
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"pattern": {
|
||||
"type": "string",
|
||||
"description": "Search pattern (regex supported).",
|
||||
},
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": ("Directory to search in (default: /home/user/)."),
|
||||
},
|
||||
"include": {
|
||||
"type": "string",
|
||||
"description": "File glob to include (e.g., '*.py').",
|
||||
},
|
||||
},
|
||||
"required": ["pattern"],
|
||||
}
|
||||
|
||||
@property
|
||||
def requires_auth(self) -> bool:
|
||||
return False
|
||||
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
session: ChatSession,
|
||||
**kwargs: Any,
|
||||
) -> ToolResponseBase:
|
||||
pattern = kwargs.get("pattern", "")
|
||||
path = kwargs.get("path", _SANDBOX_HOME)
|
||||
include = kwargs.get("include", "")
|
||||
|
||||
sandbox = await _get_sandbox(session)
|
||||
if sandbox is None:
|
||||
return _sandbox_unavailable(session)
|
||||
|
||||
resolved = _resolve_path(path)
|
||||
if resolved is None:
|
||||
return _path_error(path, session)
|
||||
|
||||
include_flag = f" --include={shlex.quote(include)}" if include else ""
|
||||
try:
|
||||
result = await sandbox.commands.run(
|
||||
f"grep -rn{include_flag} {shlex.quote(pattern)} {resolved} 2>/dev/null",
|
||||
timeout=15,
|
||||
)
|
||||
return BashExecResponse(
|
||||
message="Grep results",
|
||||
stdout=result.stdout,
|
||||
stderr=result.stderr,
|
||||
exit_code=result.exit_code,
|
||||
timed_out=False,
|
||||
session_id=session.session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
return ErrorResponse(
|
||||
message=f"Failed to grep: {e}",
|
||||
error=str(e),
|
||||
session_id=session.session_id,
|
||||
)
|
||||
|
||||
|
||||
class SaveToWorkspaceTool(BaseTool):
|
||||
"""Copy a file from e2b sandbox to the persistent GCS workspace."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "save_to_workspace"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return (
|
||||
"Save a file from the sandbox to the persistent workspace "
|
||||
"(cloud storage). Files saved to workspace survive across sessions. "
|
||||
"Provide the sandbox file path and optional workspace path."
|
||||
)
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"sandbox_path": {
|
||||
"type": "string",
|
||||
"description": "Path of the file in the sandbox to save.",
|
||||
},
|
||||
"workspace_path": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Path in the workspace to save to "
|
||||
"(defaults to the sandbox filename)."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["sandbox_path"],
|
||||
}
|
||||
|
||||
@property
|
||||
def requires_auth(self) -> bool:
|
||||
return True
|
||||
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
session: ChatSession,
|
||||
**kwargs: Any,
|
||||
) -> ToolResponseBase:
|
||||
sandbox_path = kwargs.get("sandbox_path", "")
|
||||
workspace_path = kwargs.get("workspace_path", "")
|
||||
|
||||
if not user_id:
|
||||
return ErrorResponse(
|
||||
message="Authentication required",
|
||||
session_id=session.session_id,
|
||||
)
|
||||
|
||||
sandbox = await _get_sandbox(session)
|
||||
if sandbox is None:
|
||||
return _sandbox_unavailable(session)
|
||||
|
||||
resolved = _resolve_path(sandbox_path)
|
||||
if resolved is None:
|
||||
return _path_error(sandbox_path, session)
|
||||
|
||||
try:
|
||||
content_bytes = await sandbox.files.read(resolved, format="bytes")
|
||||
|
||||
# Determine workspace path
|
||||
filename = resolved.rsplit("/", 1)[-1]
|
||||
wp = workspace_path or f"/{filename}"
|
||||
|
||||
from backend.data.db_accessors import workspace_db
|
||||
from backend.util.workspace import WorkspaceManager
|
||||
|
||||
workspace = await workspace_db().get_or_create_workspace(user_id)
|
||||
manager = WorkspaceManager(user_id, workspace.id, session.session_id)
|
||||
file_record = await manager.write_file(
|
||||
content=content_bytes,
|
||||
filename=filename,
|
||||
path=wp,
|
||||
overwrite=True,
|
||||
)
|
||||
|
||||
return BashExecResponse(
|
||||
message=f"Saved {resolved} to workspace at {file_record.path}",
|
||||
stdout=(
|
||||
f"Saved to workspace: {file_record.path} "
|
||||
f"({file_record.size_bytes} bytes)"
|
||||
),
|
||||
stderr="",
|
||||
exit_code=0,
|
||||
timed_out=False,
|
||||
session_id=session.session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
return ErrorResponse(
|
||||
message=f"Failed to save to workspace: {e}",
|
||||
error=str(e),
|
||||
session_id=session.session_id,
|
||||
)
|
||||
|
||||
|
||||
class LoadFromWorkspaceTool(BaseTool):
|
||||
"""Copy a file from the persistent GCS workspace into the e2b sandbox."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "load_from_workspace"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return (
|
||||
"Load a file from the persistent workspace (cloud storage) into "
|
||||
"the sandbox. Use this to bring workspace files into the sandbox "
|
||||
"for editing or processing."
|
||||
)
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"workspace_path": {
|
||||
"type": "string",
|
||||
"description": ("Path of the file in the workspace to load."),
|
||||
},
|
||||
"sandbox_path": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Path in the sandbox to write to "
|
||||
"(defaults to /home/user/<filename>)."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["workspace_path"],
|
||||
}
|
||||
|
||||
@property
|
||||
def requires_auth(self) -> bool:
|
||||
return True
|
||||
|
||||
async def _execute(
|
||||
self,
|
||||
user_id: str | None,
|
||||
session: ChatSession,
|
||||
**kwargs: Any,
|
||||
) -> ToolResponseBase:
|
||||
workspace_path = kwargs.get("workspace_path", "")
|
||||
sandbox_path = kwargs.get("sandbox_path", "")
|
||||
|
||||
if not user_id:
|
||||
return ErrorResponse(
|
||||
message="Authentication required",
|
||||
session_id=session.session_id,
|
||||
)
|
||||
|
||||
sandbox = await _get_sandbox(session)
|
||||
if sandbox is None:
|
||||
return _sandbox_unavailable(session)
|
||||
|
||||
try:
|
||||
from backend.data.db_accessors import workspace_db
|
||||
from backend.util.workspace import WorkspaceManager
|
||||
|
||||
workspace = await workspace_db().get_or_create_workspace(user_id)
|
||||
manager = WorkspaceManager(user_id, workspace.id, session.session_id)
|
||||
file_info = await manager.get_file_info_by_path(workspace_path)
|
||||
if file_info is None:
|
||||
return ErrorResponse(
|
||||
message=f"File not found in workspace: {workspace_path}",
|
||||
session_id=session.session_id,
|
||||
)
|
||||
content = await manager.read_file_by_id(file_info.id)
|
||||
|
||||
# Determine sandbox path
|
||||
filename = workspace_path.rsplit("/", 1)[-1]
|
||||
target = sandbox_path or f"{_SANDBOX_HOME}/{filename}"
|
||||
resolved = _resolve_path(target)
|
||||
if resolved is None:
|
||||
return _path_error(target, session)
|
||||
|
||||
# Ensure parent directory exists
|
||||
parent = posixpath.dirname(resolved)
|
||||
if parent and parent != _SANDBOX_HOME:
|
||||
await sandbox.commands.run(f"mkdir -p {parent}", timeout=5)
|
||||
await sandbox.files.write(resolved, content)
|
||||
|
||||
return BashExecResponse(
|
||||
message=f"Loaded {workspace_path} into sandbox at {resolved}",
|
||||
stdout=(f"Loaded from workspace: {resolved} ({len(content)} bytes)"),
|
||||
stderr="",
|
||||
exit_code=0,
|
||||
timed_out=False,
|
||||
session_id=session.session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
return ErrorResponse(
|
||||
message=f"Failed to load from workspace: {e}",
|
||||
error=str(e),
|
||||
session_id=session.session_id,
|
||||
)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Module-level helpers (placed after functions that call them)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
def _resolve_path(path: str) -> str | None:
|
||||
"""Resolve a path to an absolute path within /home/user/.
|
||||
|
||||
Returns None if the path escapes the sandbox home.
|
||||
"""
|
||||
if not path:
|
||||
return None
|
||||
|
||||
# Handle relative paths
|
||||
if not path.startswith("/"):
|
||||
path = f"{_SANDBOX_HOME}/{path}"
|
||||
|
||||
# Normalize to prevent traversal
|
||||
resolved = posixpath.normpath(path)
|
||||
|
||||
if not resolved.startswith(_SANDBOX_HOME):
|
||||
return None
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
async def _get_sandbox(session: ChatSession) -> Any | None:
|
||||
"""Get the sandbox for the current session from the execution context."""
|
||||
try:
|
||||
from backend.copilot.sdk.tool_adapter import get_sandbox_manager
|
||||
|
||||
manager = get_sandbox_manager()
|
||||
if manager is None:
|
||||
return None
|
||||
user_id, _ = _get_user_from_context()
|
||||
return await manager.get_or_create(session.session_id, user_id or "anonymous")
|
||||
except Exception as e:
|
||||
logger.error(f"[E2B] Failed to get sandbox: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _get_user_from_context() -> tuple[str | None, Any]:
|
||||
"""Get user_id from execution context."""
|
||||
from backend.copilot.sdk.tool_adapter import get_execution_context
|
||||
|
||||
return get_execution_context()
|
||||
|
||||
|
||||
def _sandbox_unavailable(session: ChatSession) -> ErrorResponse:
|
||||
"""Return an error response for unavailable sandbox."""
|
||||
return ErrorResponse(
|
||||
message="E2B sandbox is not available. Try again or contact support.",
|
||||
error="sandbox_unavailable",
|
||||
session_id=session.session_id,
|
||||
)
|
||||
|
||||
|
||||
def _path_error(path: str, session: ChatSession) -> ErrorResponse:
|
||||
"""Return an error response for invalid paths."""
|
||||
return ErrorResponse(
|
||||
message=f"Invalid path: {path}. Paths must be within /home/user/.",
|
||||
error="invalid_path",
|
||||
session_id=session.session_id,
|
||||
)
|
||||
@@ -1,215 +0,0 @@
|
||||
"""E2B sandbox manager for CoPilot sessions.
|
||||
|
||||
Manages e2b sandbox lifecycle: create, reuse via Redis, dispose with GCS sync.
|
||||
One sandbox per session, cached in-memory on the worker thread and stored in
|
||||
Redis for cross-pod reconnection.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from backend.util.settings import Config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_REDIS_KEY_PREFIX = "copilot:sandbox:"
|
||||
_SANDBOX_HOME = "/home/user"
|
||||
|
||||
|
||||
class CoPilotSandboxManager:
|
||||
"""Manages e2b sandbox lifecycle for CoPilot sessions.
|
||||
|
||||
Each session gets a single sandbox. The sandbox_id is stored in Redis
|
||||
so another pod can reconnect to it if the original pod dies.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._sandboxes: dict[str, Any] = {} # session_id -> AsyncSandbox
|
||||
self._last_activity: dict[str, float] = {} # session_id -> timestamp
|
||||
self._cleanup_task: asyncio.Task[None] | None = None
|
||||
config = Config()
|
||||
self._timeout: int = config.copilot_sandbox_timeout
|
||||
self._template: str = config.copilot_sandbox_template
|
||||
self._api_key: str = config.e2b_api_key
|
||||
|
||||
async def get_or_create(self, session_id: str, user_id: str) -> Any:
|
||||
"""Get existing sandbox or create a new one for this session.
|
||||
|
||||
Args:
|
||||
session_id: CoPilot chat session ID.
|
||||
user_id: User ID for workspace sync.
|
||||
|
||||
Returns:
|
||||
An e2b AsyncSandbox instance.
|
||||
"""
|
||||
self._last_activity[session_id] = time.monotonic()
|
||||
|
||||
# 1. Check in-memory cache
|
||||
if session_id in self._sandboxes:
|
||||
sandbox = self._sandboxes[session_id]
|
||||
if await _is_sandbox_alive(sandbox):
|
||||
return sandbox
|
||||
# Sandbox died — clean up stale reference
|
||||
del self._sandboxes[session_id]
|
||||
|
||||
# 2. Check Redis for sandbox_id (cross-pod reconnection)
|
||||
sandbox = await self._try_reconnect_from_redis(session_id)
|
||||
if sandbox is not None:
|
||||
self._sandboxes[session_id] = sandbox
|
||||
return sandbox
|
||||
|
||||
# 3. Create new sandbox
|
||||
sandbox = await self._create_sandbox(session_id, user_id)
|
||||
self._sandboxes[session_id] = sandbox
|
||||
await _store_sandbox_id_in_redis(session_id, sandbox.sandbox_id)
|
||||
|
||||
# 4. Start cleanup task if not running
|
||||
self._ensure_cleanup_task()
|
||||
|
||||
return sandbox
|
||||
|
||||
async def dispose(self, session_id: str) -> None:
|
||||
"""Persist workspace files to GCS, then kill sandbox.
|
||||
|
||||
Args:
|
||||
session_id: CoPilot chat session ID.
|
||||
"""
|
||||
sandbox = self._sandboxes.pop(session_id, None)
|
||||
self._last_activity.pop(session_id, None)
|
||||
|
||||
if sandbox is None:
|
||||
return
|
||||
|
||||
try:
|
||||
await sandbox.kill()
|
||||
except Exception as e:
|
||||
logger.warning(f"[E2B] Failed to kill sandbox for {session_id}: {e}")
|
||||
|
||||
await _remove_sandbox_id_from_redis(session_id)
|
||||
logger.info(f"[E2B] Disposed sandbox for session {session_id}")
|
||||
|
||||
async def dispose_all(self) -> None:
|
||||
"""Dispose all sandboxes (called on processor shutdown)."""
|
||||
session_ids = list(self._sandboxes.keys())
|
||||
for sid in session_ids:
|
||||
await self.dispose(sid)
|
||||
if self._cleanup_task and not self._cleanup_task.done():
|
||||
self._cleanup_task.cancel()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _create_sandbox(self, session_id: str, user_id: str) -> Any:
|
||||
"""Create a new e2b sandbox."""
|
||||
from e2b import AsyncSandbox
|
||||
|
||||
kwargs: dict[str, Any] = {"api_key": self._api_key}
|
||||
if self._template:
|
||||
kwargs["template"] = self._template
|
||||
if self._timeout:
|
||||
kwargs["timeout"] = self._timeout
|
||||
|
||||
sandbox = await AsyncSandbox.create(**kwargs)
|
||||
logger.info(
|
||||
f"[E2B] Created sandbox {sandbox.sandbox_id} for session={session_id}, "
|
||||
f"user={user_id}"
|
||||
)
|
||||
return sandbox
|
||||
|
||||
async def _try_reconnect_from_redis(self, session_id: str) -> Any | None:
|
||||
"""Attempt to reconnect to a sandbox stored in Redis."""
|
||||
from e2b import AsyncSandbox
|
||||
|
||||
sandbox_id = await _load_sandbox_id_from_redis(session_id)
|
||||
if not sandbox_id:
|
||||
return None
|
||||
|
||||
try:
|
||||
sandbox = await AsyncSandbox.connect(
|
||||
sandbox_id=sandbox_id, api_key=self._api_key
|
||||
)
|
||||
logger.info(
|
||||
f"[E2B] Reconnected to sandbox {sandbox_id} for session={session_id}"
|
||||
)
|
||||
return sandbox
|
||||
except Exception as e:
|
||||
logger.warning(f"[E2B] Failed to reconnect to sandbox {sandbox_id}: {e}")
|
||||
await _remove_sandbox_id_from_redis(session_id)
|
||||
return None
|
||||
|
||||
def _ensure_cleanup_task(self) -> None:
|
||||
"""Start the idle cleanup background task if not already running."""
|
||||
if self._cleanup_task is None or self._cleanup_task.done():
|
||||
self._cleanup_task = asyncio.ensure_future(self._idle_cleanup_loop())
|
||||
|
||||
async def _idle_cleanup_loop(self) -> None:
|
||||
"""Periodically check for idle sandboxes and dispose them."""
|
||||
while True:
|
||||
await asyncio.sleep(60)
|
||||
if not self._sandboxes:
|
||||
continue
|
||||
now = time.monotonic()
|
||||
to_dispose: list[str] = []
|
||||
for sid, last in list(self._last_activity.items()):
|
||||
if now - last > self._timeout:
|
||||
to_dispose.append(sid)
|
||||
for sid in to_dispose:
|
||||
logger.info(f"[E2B] Disposing idle sandbox for session {sid}")
|
||||
await self.dispose(sid)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Module-level helpers (placed after classes that call them)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _is_sandbox_alive(sandbox: Any) -> bool:
|
||||
"""Check if an e2b sandbox is still running."""
|
||||
try:
|
||||
result = await sandbox.commands.run("echo ok", timeout=5)
|
||||
return result.exit_code == 0
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
async def _store_sandbox_id_in_redis(session_id: str, sandbox_id: str) -> None:
|
||||
"""Store sandbox_id in Redis keyed by session_id."""
|
||||
try:
|
||||
from backend.data import redis as redis_client
|
||||
|
||||
redis = redis_client.get_redis()
|
||||
key = f"{_REDIS_KEY_PREFIX}{session_id}"
|
||||
config = Config()
|
||||
ttl = max(config.copilot_sandbox_timeout * 2, 3600) # At least 1h, 2x timeout
|
||||
await redis.set(key, sandbox_id, ex=ttl)
|
||||
except Exception as e:
|
||||
logger.warning(f"[E2B] Failed to store sandbox_id in Redis: {e}")
|
||||
|
||||
|
||||
async def _load_sandbox_id_from_redis(session_id: str) -> str | None:
|
||||
"""Load sandbox_id from Redis."""
|
||||
try:
|
||||
from backend.data import redis as redis_client
|
||||
|
||||
redis = redis_client.get_redis()
|
||||
key = f"{_REDIS_KEY_PREFIX}{session_id}"
|
||||
value = await redis.get(key)
|
||||
return value.decode() if isinstance(value, bytes) else value
|
||||
except Exception as e:
|
||||
logger.warning(f"[E2B] Failed to load sandbox_id from Redis: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def _remove_sandbox_id_from_redis(session_id: str) -> None:
|
||||
"""Remove sandbox_id from Redis."""
|
||||
try:
|
||||
from backend.data import redis as redis_client
|
||||
|
||||
redis = redis_client.get_redis()
|
||||
key = f"{_REDIS_KEY_PREFIX}{session_id}"
|
||||
await redis.delete(key)
|
||||
except Exception as e:
|
||||
logger.warning(f"[E2B] Failed to remove sandbox_id from Redis: {e}")
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
@@ -50,6 +50,8 @@ class ResponseType(str, Enum):
|
||||
# Feature request types
|
||||
FEATURE_REQUEST_SEARCH = "feature_request_search"
|
||||
FEATURE_REQUEST_CREATED = "feature_request_created"
|
||||
# Goal refinement
|
||||
SUGGESTED_GOAL = "suggested_goal"
|
||||
|
||||
|
||||
# Base response model
|
||||
@@ -296,6 +298,22 @@ class ClarificationNeededResponse(ToolResponseBase):
|
||||
questions: list[ClarifyingQuestion] = Field(default_factory=list)
|
||||
|
||||
|
||||
class SuggestedGoalResponse(ToolResponseBase):
|
||||
"""Response when the goal needs refinement with a suggested alternative."""
|
||||
|
||||
type: ResponseType = ResponseType.SUGGESTED_GOAL
|
||||
suggested_goal: str = Field(description="The suggested alternative goal")
|
||||
reason: str = Field(
|
||||
default="", description="Why the original goal needs refinement"
|
||||
)
|
||||
original_goal: str = Field(
|
||||
default="", description="The user's original goal for context"
|
||||
)
|
||||
goal_type: Literal["vague", "unachievable"] = Field(
|
||||
default="vague", description="Type: 'vague' or 'unachievable'"
|
||||
)
|
||||
|
||||
|
||||
# Documentation search models
|
||||
class DocSearchResult(BaseModel):
|
||||
"""A single documentation search result."""
|
||||
|
||||
@@ -39,7 +39,6 @@ class Flag(str, Enum):
|
||||
ENABLE_PLATFORM_PAYMENT = "enable-platform-payment"
|
||||
CHAT = "chat"
|
||||
COPILOT_SDK = "copilot-sdk"
|
||||
COPILOT_E2B = "copilot-e2b"
|
||||
|
||||
|
||||
def is_configured() -> bool:
|
||||
|
||||
@@ -74,8 +74,50 @@ TEXT_EXTENSIONS = {
|
||||
".tex",
|
||||
".csv",
|
||||
".log",
|
||||
".svg", # SVG is XML-based text
|
||||
}
|
||||
|
||||
# Binary file extensions we explicitly support extracting
|
||||
BINARY_EXTENSIONS = {
|
||||
# Images
|
||||
".png",
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".gif",
|
||||
".webp",
|
||||
".ico",
|
||||
".bmp",
|
||||
".tiff",
|
||||
".tif",
|
||||
# Documents
|
||||
".pdf",
|
||||
# Archives
|
||||
".zip",
|
||||
".tar",
|
||||
".gz",
|
||||
".7z",
|
||||
# Audio
|
||||
".mp3",
|
||||
".wav",
|
||||
".ogg",
|
||||
".flac",
|
||||
# Video
|
||||
".mp4",
|
||||
".webm",
|
||||
".mov",
|
||||
".avi",
|
||||
# Fonts
|
||||
".woff",
|
||||
".woff2",
|
||||
".ttf",
|
||||
".otf",
|
||||
".eot",
|
||||
}
|
||||
|
||||
# Maximum file size for binary extraction (50MB)
|
||||
# Prevents OOM from accidentally extracting huge files
|
||||
MAX_BINARY_FILE_SIZE = 50 * 1024 * 1024
|
||||
|
||||
|
||||
class SandboxFileOutput(BaseModel):
|
||||
"""A file extracted from a sandbox and optionally stored in workspace."""
|
||||
@@ -120,7 +162,8 @@ async def extract_sandbox_files(
|
||||
sandbox: The E2B sandbox instance
|
||||
working_directory: Directory to search for files
|
||||
since_timestamp: ISO timestamp - only return files modified after this time
|
||||
text_only: If True, only extract text files (default). If False, extract all files.
|
||||
text_only: If True, only extract text files. If False, also extract
|
||||
supported binary files (images, PDFs, etc.).
|
||||
|
||||
Returns:
|
||||
List of ExtractedFile objects with path, content, and metadata
|
||||
@@ -149,15 +192,48 @@ async def extract_sandbox_files(
|
||||
if not file_path:
|
||||
continue
|
||||
|
||||
# Check if it's a text file
|
||||
is_text = any(file_path.endswith(ext) for ext in TEXT_EXTENSIONS)
|
||||
# Check file type (case-insensitive for extensions)
|
||||
file_path_lower = file_path.lower()
|
||||
is_text = any(
|
||||
file_path_lower.endswith(ext.lower()) for ext in TEXT_EXTENSIONS
|
||||
)
|
||||
is_binary = any(
|
||||
file_path_lower.endswith(ext.lower()) for ext in BINARY_EXTENSIONS
|
||||
)
|
||||
|
||||
# Skip non-text files if text_only mode
|
||||
# Skip files with unrecognized extensions
|
||||
if not is_text and not is_binary:
|
||||
continue
|
||||
|
||||
# In text_only mode, skip binary files
|
||||
if text_only and not is_text:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Read file content as bytes
|
||||
# Check file size before reading to prevent OOM
|
||||
stat_result = await sandbox.commands.run(
|
||||
f"stat -c %s {shlex.quote(file_path)} 2>/dev/null"
|
||||
)
|
||||
if stat_result.exit_code != 0 or not stat_result.stdout:
|
||||
logger.debug(f"Skipping {file_path}: could not determine file size")
|
||||
continue
|
||||
|
||||
try:
|
||||
file_size = int(stat_result.stdout.strip())
|
||||
except ValueError:
|
||||
logger.debug(
|
||||
f"Skipping {file_path}: unexpected stat output "
|
||||
f"{stat_result.stdout.strip()!r}"
|
||||
)
|
||||
continue
|
||||
|
||||
if file_size > MAX_BINARY_FILE_SIZE:
|
||||
logger.info(
|
||||
f"Skipping {file_path}: size {file_size} bytes "
|
||||
f"exceeds limit {MAX_BINARY_FILE_SIZE}"
|
||||
)
|
||||
continue
|
||||
|
||||
content = await sandbox.files.read(file_path, format="bytes")
|
||||
if isinstance(content, str):
|
||||
content = content.encode("utf-8")
|
||||
|
||||
@@ -665,18 +665,6 @@ class Secrets(UpdateTrackingModel["Secrets"], BaseSettings):
|
||||
fal_api_key: str = Field(default="", description="FAL API key")
|
||||
exa_api_key: str = Field(default="", description="Exa API key")
|
||||
e2b_api_key: str = Field(default="", description="E2B API key")
|
||||
copilot_sandbox_timeout: int = Field(
|
||||
default=900,
|
||||
description="E2B sandbox idle timeout in seconds (default 15 min).",
|
||||
)
|
||||
copilot_sandbox_template: str = Field(
|
||||
default="",
|
||||
description="E2B sandbox template ID (empty = default template).",
|
||||
)
|
||||
copilot_use_e2b: bool = Field(
|
||||
default=False,
|
||||
description="Enable e2b sandbox for CoPilot (feature flag default).",
|
||||
)
|
||||
nvidia_api_key: str = Field(default="", description="Nvidia API key")
|
||||
mem0_api_key: str = Field(default="", description="Mem0 API key")
|
||||
elevenlabs_api_key: str = Field(default="", description="ElevenLabs API key")
|
||||
|
||||
@@ -30,6 +30,16 @@ pnpm format
|
||||
pnpm types
|
||||
```
|
||||
|
||||
### Pre-completion Checks (MANDATORY)
|
||||
|
||||
After making **any** code changes in the frontend, you MUST run the following commands **in order** before reporting work as done, creating commits, or opening PRs:
|
||||
|
||||
1. `pnpm format` — auto-fix formatting issues
|
||||
2. `pnpm lint` — check for lint errors; fix any that appear
|
||||
3. `pnpm types` — check for type errors; fix any that appear
|
||||
|
||||
Do NOT skip these steps. If any command reports errors, fix them and re-run until clean. Only then may you consider the task complete. If typing keeps failing, stop and ask the user.
|
||||
|
||||
### Code Style
|
||||
|
||||
- Fully capitalize acronyms in symbols, e.g. `graphID`, `useBackendAPI`
|
||||
@@ -74,3 +84,4 @@ See @CONTRIBUTING.md for complete patterns. Quick reference:
|
||||
- Do not use `useCallback` or `useMemo` unless asked to optimise a given function
|
||||
- Do not type hook returns, let Typescript infer as much as possible
|
||||
- Never type with `any` unless a variable/attribute can ACTUALLY be of any type
|
||||
- avoid index and barrel files
|
||||
|
||||
@@ -26,6 +26,7 @@ import {
|
||||
} from "./components/ClarificationQuestionsCard";
|
||||
import sparklesImg from "./components/MiniGame/assets/sparkles.png";
|
||||
import { MiniGame } from "./components/MiniGame/MiniGame";
|
||||
import { SuggestedGoalCard } from "./components/SuggestedGoalCard";
|
||||
import {
|
||||
AccordionIcon,
|
||||
formatMaybeJson,
|
||||
@@ -38,6 +39,7 @@ import {
|
||||
isOperationInProgressOutput,
|
||||
isOperationPendingOutput,
|
||||
isOperationStartedOutput,
|
||||
isSuggestedGoalOutput,
|
||||
ToolIcon,
|
||||
truncateText,
|
||||
type CreateAgentToolOutput,
|
||||
@@ -77,6 +79,13 @@ function getAccordionMeta(output: CreateAgentToolOutput) {
|
||||
expanded: true,
|
||||
};
|
||||
}
|
||||
if (isSuggestedGoalOutput(output)) {
|
||||
return {
|
||||
icon,
|
||||
title: "Goal needs refinement",
|
||||
expanded: true,
|
||||
};
|
||||
}
|
||||
if (
|
||||
isOperationStartedOutput(output) ||
|
||||
isOperationPendingOutput(output) ||
|
||||
@@ -125,8 +134,13 @@ export function CreateAgentTool({ part }: Props) {
|
||||
isAgentPreviewOutput(output) ||
|
||||
isAgentSavedOutput(output) ||
|
||||
isClarificationNeededOutput(output) ||
|
||||
isSuggestedGoalOutput(output) ||
|
||||
isErrorOutput(output));
|
||||
|
||||
function handleUseSuggestedGoal(goal: string) {
|
||||
onSend(`Please create an agent with this goal: ${goal}`);
|
||||
}
|
||||
|
||||
function handleClarificationAnswers(answers: Record<string, string>) {
|
||||
const questions =
|
||||
output && isClarificationNeededOutput(output)
|
||||
@@ -245,6 +259,16 @@ export function CreateAgentTool({ part }: Props) {
|
||||
/>
|
||||
)}
|
||||
|
||||
{isSuggestedGoalOutput(output) && (
|
||||
<SuggestedGoalCard
|
||||
message={output.message}
|
||||
suggestedGoal={output.suggested_goal}
|
||||
reason={output.reason}
|
||||
goalType={output.goal_type ?? "vague"}
|
||||
onUseSuggestedGoal={handleUseSuggestedGoal}
|
||||
/>
|
||||
)}
|
||||
|
||||
{isErrorOutput(output) && (
|
||||
<ContentGrid>
|
||||
<ContentMessage>{output.message}</ContentMessage>
|
||||
@@ -258,6 +282,22 @@ export function CreateAgentTool({ part }: Props) {
|
||||
{formatMaybeJson(output.details)}
|
||||
</ContentCodeBlock>
|
||||
)}
|
||||
<div className="flex gap-2">
|
||||
<Button
|
||||
variant="outline"
|
||||
size="small"
|
||||
onClick={() => onSend("Please try creating the agent again.")}
|
||||
>
|
||||
Try again
|
||||
</Button>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="small"
|
||||
onClick={() => onSend("Can you help me simplify this goal?")}
|
||||
>
|
||||
Simplify goal
|
||||
</Button>
|
||||
</div>
|
||||
</ContentGrid>
|
||||
)}
|
||||
</ToolAccordion>
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
"use client";
|
||||
|
||||
import { Button } from "@/components/atoms/Button/Button";
|
||||
import { Text } from "@/components/atoms/Text/Text";
|
||||
import { ArrowRightIcon, LightbulbIcon } from "@phosphor-icons/react";
|
||||
|
||||
interface Props {
|
||||
message: string;
|
||||
suggestedGoal: string;
|
||||
reason?: string;
|
||||
goalType: string;
|
||||
onUseSuggestedGoal: (goal: string) => void;
|
||||
}
|
||||
|
||||
export function SuggestedGoalCard({
|
||||
message,
|
||||
suggestedGoal,
|
||||
reason,
|
||||
goalType,
|
||||
onUseSuggestedGoal,
|
||||
}: Props) {
|
||||
return (
|
||||
<div className="rounded-xl border border-amber-200 bg-amber-50/50 p-4">
|
||||
<div className="flex items-start gap-3">
|
||||
<LightbulbIcon
|
||||
size={20}
|
||||
weight="fill"
|
||||
className="mt-0.5 text-amber-600"
|
||||
/>
|
||||
<div className="flex-1 space-y-3">
|
||||
<div>
|
||||
<Text variant="body-medium" className="font-medium text-slate-900">
|
||||
{goalType === "unachievable"
|
||||
? "Goal cannot be accomplished"
|
||||
: "Goal needs more detail"}
|
||||
</Text>
|
||||
<Text variant="small" className="text-slate-600">
|
||||
{reason || message}
|
||||
</Text>
|
||||
</div>
|
||||
|
||||
<div className="rounded-lg border border-amber-300 bg-white p-3">
|
||||
<Text variant="small" className="mb-1 font-semibold text-amber-800">
|
||||
Suggested alternative:
|
||||
</Text>
|
||||
<Text variant="body-medium" className="text-slate-900">
|
||||
{suggestedGoal}
|
||||
</Text>
|
||||
</div>
|
||||
|
||||
<Button
|
||||
onClick={() => onUseSuggestedGoal(suggestedGoal)}
|
||||
variant="primary"
|
||||
>
|
||||
<span className="inline-flex items-center gap-1.5">
|
||||
Use this goal <ArrowRightIcon size={14} weight="bold" />
|
||||
</span>
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -6,6 +6,7 @@ import type { OperationInProgressResponse } from "@/app/api/__generated__/models
|
||||
import type { OperationPendingResponse } from "@/app/api/__generated__/models/operationPendingResponse";
|
||||
import type { OperationStartedResponse } from "@/app/api/__generated__/models/operationStartedResponse";
|
||||
import { ResponseType } from "@/app/api/__generated__/models/responseType";
|
||||
import type { SuggestedGoalResponse } from "@/app/api/__generated__/models/suggestedGoalResponse";
|
||||
import {
|
||||
PlusCircleIcon,
|
||||
PlusIcon,
|
||||
@@ -21,6 +22,7 @@ export type CreateAgentToolOutput =
|
||||
| AgentPreviewResponse
|
||||
| AgentSavedResponse
|
||||
| ClarificationNeededResponse
|
||||
| SuggestedGoalResponse
|
||||
| ErrorResponse;
|
||||
|
||||
function parseOutput(output: unknown): CreateAgentToolOutput | null {
|
||||
@@ -43,6 +45,7 @@ function parseOutput(output: unknown): CreateAgentToolOutput | null {
|
||||
type === ResponseType.agent_preview ||
|
||||
type === ResponseType.agent_saved ||
|
||||
type === ResponseType.clarification_needed ||
|
||||
type === ResponseType.suggested_goal ||
|
||||
type === ResponseType.error
|
||||
) {
|
||||
return output as CreateAgentToolOutput;
|
||||
@@ -55,6 +58,7 @@ function parseOutput(output: unknown): CreateAgentToolOutput | null {
|
||||
if ("agent_id" in output && "library_agent_id" in output)
|
||||
return output as AgentSavedResponse;
|
||||
if ("questions" in output) return output as ClarificationNeededResponse;
|
||||
if ("suggested_goal" in output) return output as SuggestedGoalResponse;
|
||||
if ("error" in output || "details" in output)
|
||||
return output as ErrorResponse;
|
||||
}
|
||||
@@ -114,6 +118,14 @@ export function isClarificationNeededOutput(
|
||||
);
|
||||
}
|
||||
|
||||
export function isSuggestedGoalOutput(
|
||||
output: CreateAgentToolOutput,
|
||||
): output is SuggestedGoalResponse {
|
||||
return (
|
||||
output.type === ResponseType.suggested_goal || "suggested_goal" in output
|
||||
);
|
||||
}
|
||||
|
||||
export function isErrorOutput(
|
||||
output: CreateAgentToolOutput,
|
||||
): output is ErrorResponse {
|
||||
@@ -139,6 +151,7 @@ export function getAnimationText(part: {
|
||||
if (isAgentSavedOutput(output)) return `Saved ${output.agent_name}`;
|
||||
if (isAgentPreviewOutput(output)) return `Preview "${output.agent_name}"`;
|
||||
if (isClarificationNeededOutput(output)) return "Needs clarification";
|
||||
if (isSuggestedGoalOutput(output)) return "Goal needs refinement";
|
||||
return "Error creating agent";
|
||||
}
|
||||
case "output-error":
|
||||
|
||||
@@ -1052,6 +1052,7 @@
|
||||
{
|
||||
"$ref": "#/components/schemas/ClarificationNeededResponse"
|
||||
},
|
||||
{ "$ref": "#/components/schemas/SuggestedGoalResponse" },
|
||||
{ "$ref": "#/components/schemas/BlockListResponse" },
|
||||
{ "$ref": "#/components/schemas/BlockDetailsResponse" },
|
||||
{ "$ref": "#/components/schemas/BlockOutputResponse" },
|
||||
@@ -10796,7 +10797,8 @@
|
||||
"bash_exec",
|
||||
"operation_status",
|
||||
"feature_request_search",
|
||||
"feature_request_created"
|
||||
"feature_request_created",
|
||||
"suggested_goal"
|
||||
],
|
||||
"title": "ResponseType",
|
||||
"description": "Types of tool responses."
|
||||
@@ -11677,6 +11679,47 @@
|
||||
"enum": ["DRAFT", "PENDING", "APPROVED", "REJECTED"],
|
||||
"title": "SubmissionStatus"
|
||||
},
|
||||
"SuggestedGoalResponse": {
|
||||
"properties": {
|
||||
"type": {
|
||||
"$ref": "#/components/schemas/ResponseType",
|
||||
"default": "suggested_goal"
|
||||
},
|
||||
"message": { "type": "string", "title": "Message" },
|
||||
"session_id": {
|
||||
"anyOf": [{ "type": "string" }, { "type": "null" }],
|
||||
"title": "Session Id"
|
||||
},
|
||||
"suggested_goal": {
|
||||
"type": "string",
|
||||
"title": "Suggested Goal",
|
||||
"description": "The suggested alternative goal"
|
||||
},
|
||||
"reason": {
|
||||
"type": "string",
|
||||
"title": "Reason",
|
||||
"description": "Why the original goal needs refinement",
|
||||
"default": ""
|
||||
},
|
||||
"original_goal": {
|
||||
"type": "string",
|
||||
"title": "Original Goal",
|
||||
"description": "The user's original goal for context",
|
||||
"default": ""
|
||||
},
|
||||
"goal_type": {
|
||||
"type": "string",
|
||||
"enum": ["vague", "unachievable"],
|
||||
"title": "Goal Type",
|
||||
"description": "Type: 'vague' or 'unachievable'",
|
||||
"default": "vague"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": ["message", "suggested_goal"],
|
||||
"title": "SuggestedGoalResponse",
|
||||
"description": "Response when the goal needs refinement with a suggested alternative."
|
||||
},
|
||||
"SuggestionsResponse": {
|
||||
"properties": {
|
||||
"otto_suggestions": {
|
||||
|
||||
@@ -16,7 +16,7 @@ When activated, the block:
|
||||
- Install dependencies (npm, pip, etc.)
|
||||
- Run terminal commands
|
||||
- Build and test applications
|
||||
5. Extracts all text files created/modified during execution
|
||||
5. Extracts all files created/modified during execution (text files and binary files like images, PDFs, etc.)
|
||||
6. Returns the response and files, optionally keeping the sandbox alive for follow-up tasks
|
||||
|
||||
The block supports conversation continuation through three mechanisms:
|
||||
@@ -42,7 +42,7 @@ The block supports conversation continuation through three mechanisms:
|
||||
| Output | Description |
|
||||
|--------|-------------|
|
||||
| Response | The output/response from Claude Code execution |
|
||||
| Files | List of text files created/modified during execution. Each file includes path, relative_path, name, and content fields |
|
||||
| Files | List of files (text and binary) created/modified during execution. Includes images, PDFs, and other supported formats. Each file has path, relative_path, name, content, and workspace_ref fields. Binary files are stored in workspace and accessible via workspace_ref |
|
||||
| Conversation History | Full conversation history including this turn. Use to restore context on a fresh sandbox |
|
||||
| Session ID | Session ID for this conversation. Pass back with sandbox_id to continue the conversation |
|
||||
| Sandbox ID | ID of the sandbox instance (null if disposed). Pass back with session_id to continue the conversation |
|
||||
|
||||
@@ -535,7 +535,7 @@ When activated, the block:
|
||||
2. Installs the latest version of Claude Code in the sandbox
|
||||
3. Optionally runs setup commands to prepare the environment
|
||||
4. Executes your prompt using Claude Code, which can create/edit files, install dependencies, run terminal commands, and build applications
|
||||
5. Extracts all text files created/modified during execution
|
||||
5. Extracts all files created/modified during execution (text files and binary files like images, PDFs, etc.)
|
||||
6. Returns the response and files, optionally keeping the sandbox alive for follow-up tasks
|
||||
|
||||
The block supports conversation continuation through three mechanisms:
|
||||
@@ -563,7 +563,7 @@ The block supports conversation continuation through three mechanisms:
|
||||
|--------|-------------|------|
|
||||
| error | Error message if execution failed | str |
|
||||
| response | The output/response from Claude Code execution | str |
|
||||
| files | List of text files created/modified by Claude Code during this execution. Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. workspace_ref contains a workspace:// URI if the file was stored to workspace. | List[SandboxFileOutput] |
|
||||
| files | List of files created/modified by Claude Code during this execution. Includes text files and binary files (images, PDFs, etc.). Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. workspace_ref contains a workspace:// URI for workspace storage. For binary files, content contains a placeholder; use workspace_ref to access the file. | List[SandboxFileOutput] |
|
||||
| conversation_history | Full conversation history including this turn. Pass this to conversation_history input to continue on a fresh sandbox if the previous sandbox timed out. | str |
|
||||
| session_id | Session ID for this conversation. Pass this back along with sandbox_id to continue the conversation. | str |
|
||||
| sandbox_id | ID of the sandbox instance. Pass this back along with session_id to continue the conversation. This is None if dispose_sandbox was True (sandbox was disposed). | str |
|
||||
|
||||
Reference in New Issue
Block a user