AutoGPT/autogpt_platform/backend/backend/blocks/claude_code.py

import json
import shlex
import uuid
from typing import TYPE_CHECKING, Literal, Optional

from e2b import AsyncSandbox as BaseAsyncSandbox
from pydantic import SecretStr

from backend.blocks._base import (
    Block,
    BlockCategory,
    BlockOutput,
    BlockSchemaInput,
    BlockSchemaOutput,
)
from backend.data.model import (
    APIKeyCredentials,
    CredentialsField,
    CredentialsMetaInput,
    SchemaField,
)
from backend.integrations.providers import ProviderName
from backend.util.sandbox_files import (
    SandboxFileOutput,
    extract_and_store_sandbox_files,
)

if TYPE_CHECKING:
    from backend.executor.utils import ExecutionContext


class ClaudeCodeExecutionError(Exception):
    """Exception raised when Claude Code execution fails.

    Carries the sandbox_id so it can be returned to the user for cleanup
    when dispose_sandbox=False.
    """

    def __init__(self, message: str, sandbox_id: str = ""):
        super().__init__(message)
        self.sandbox_id = sandbox_id


# Test credentials for E2B
TEST_E2B_CREDENTIALS = APIKeyCredentials(
    id="01234567-89ab-cdef-0123-456789abcdef",
    provider="e2b",
    api_key=SecretStr("mock-e2b-api-key"),
    title="Mock E2B API key",
    expires_at=None,
)
TEST_E2B_CREDENTIALS_INPUT = {
    "provider": TEST_E2B_CREDENTIALS.provider,
    "id": TEST_E2B_CREDENTIALS.id,
    "type": TEST_E2B_CREDENTIALS.type,
    "title": TEST_E2B_CREDENTIALS.title,
}

# Test credentials for Anthropic
TEST_ANTHROPIC_CREDENTIALS = APIKeyCredentials(
    id="2e568a2b-b2ea-475a-8564-9a676bf31c56",
    provider="anthropic",
    api_key=SecretStr("mock-anthropic-api-key"),
    title="Mock Anthropic API key",
    expires_at=None,
)
TEST_ANTHROPIC_CREDENTIALS_INPUT = {
    "provider": TEST_ANTHROPIC_CREDENTIALS.provider,
    "id": TEST_ANTHROPIC_CREDENTIALS.id,
    "type": TEST_ANTHROPIC_CREDENTIALS.type,
    "title": TEST_ANTHROPIC_CREDENTIALS.title,
}


class ClaudeCodeBlock(Block):
    """
    Execute tasks using Claude Code (Anthropic's AI coding assistant) in an E2B sandbox.

    Claude Code can create files, install tools, run commands, and perform complex
    coding tasks autonomously within a secure sandbox environment.
    """

    # Use base template - we'll install Claude Code ourselves for latest version
    DEFAULT_TEMPLATE = "base"

    class Input(BlockSchemaInput):
        e2b_credentials: CredentialsMetaInput[
            Literal[ProviderName.E2B], Literal["api_key"]
        ] = CredentialsField(
            description=(
                "API key for the E2B platform to create the sandbox. "
                "Get one on the [e2b website](https://e2b.dev/docs)"
            ),
        )

        anthropic_credentials: CredentialsMetaInput[
            Literal[ProviderName.ANTHROPIC], Literal["api_key"]
        ] = CredentialsField(
            description=(
                "API key for Anthropic to power Claude Code. "
                "Get one at [Anthropic's website](https://console.anthropic.com)"
            ),
        )

        prompt: str = SchemaField(
            description=(
                "The task or instruction for Claude Code to execute. "
                "Claude Code can create files, install packages, run commands, "
                "and perform complex coding tasks."
            ),
            placeholder="Create a hello world index.html file",
            default="",
            advanced=False,
        )

        timeout: int = SchemaField(
            description=(
                "Sandbox timeout in seconds. Claude Code tasks can take "
                "a while, so set this appropriately for your task complexity. "
                "Note: This only applies when creating a new sandbox. "
                "When reconnecting to an existing sandbox via sandbox_id, "
                "the original timeout is retained."
            ),
            default=300,  # 5 minutes default
            advanced=True,
        )

        setup_commands: list[str] = SchemaField(
            description=(
                "Optional shell commands to run before executing Claude Code. "
                "Useful for installing dependencies or setting up the environment."
            ),
            default_factory=list,
            advanced=True,
        )

        working_directory: str = SchemaField(
            description="Working directory for Claude Code to operate in.",
            default="/home/user",
            advanced=True,
        )

        # Session/continuation support
        session_id: str = SchemaField(
            description=(
                "Session ID to resume a previous conversation. "
                "Leave empty for a new conversation. "
                "Use the session_id from a previous run to continue that conversation."
            ),
            default="",
            advanced=True,
        )

        sandbox_id: str = SchemaField(
            description=(
                "Sandbox ID to reconnect to an existing sandbox. "
                "Required when resuming a session (along with session_id). "
                "Use the sandbox_id from a previous run where dispose_sandbox was False."
            ),
            default="",
            advanced=True,
        )

        conversation_history: str = SchemaField(
            description=(
                "Previous conversation history to continue from. "
                "Use this to restore context on a fresh sandbox if the previous one timed out. "
                "Pass the conversation_history output from a previous run."
            ),
            default="",
            advanced=True,
        )

        dispose_sandbox: bool = SchemaField(
            description=(
                "Whether to dispose of the sandbox immediately after execution. "
                "Set to False if you want to continue the conversation later "
                "(you'll need both sandbox_id and session_id from the output)."
            ),
            default=True,
            advanced=True,
        )

    class Output(BlockSchemaOutput):
        response: str = SchemaField(
            description="The output/response from Claude Code execution"
        )
        files: list[SandboxFileOutput] = SchemaField(
            description=(
                "List of text files created/modified by Claude Code during this execution. "
                "Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. "
                "workspace_ref contains a workspace:// URI if the file was stored to workspace."
            )
        )
        conversation_history: str = SchemaField(
            description=(
                "Full conversation history including this turn. "
                "Pass this to conversation_history input to continue on a fresh sandbox "
                "if the previous sandbox timed out."
            )
        )
        session_id: str = SchemaField(
            description=(
                "Session ID for this conversation. "
                "Pass this back along with sandbox_id to continue the conversation."
            )
        )
        sandbox_id: Optional[str] = SchemaField(
            description=(
                "ID of the sandbox instance. "
                "Pass this back along with session_id to continue the conversation. "
                "This is None if dispose_sandbox was True (sandbox was disposed)."
            ),
            default=None,
        )
        error: str = SchemaField(description="Error message if execution failed")

    def __init__(self):
        super().__init__(
            id="4e34f4a5-9b89-4326-ba77-2dd6750b7194",
            description=(
                "Execute tasks using Claude Code in an E2B sandbox. "
                "Claude Code can create files, install tools, run commands, "
                "and perform complex coding tasks autonomously."
            ),
            categories={BlockCategory.DEVELOPER_TOOLS, BlockCategory.AI},
            input_schema=ClaudeCodeBlock.Input,
            output_schema=ClaudeCodeBlock.Output,
            test_credentials={
                "e2b_credentials": TEST_E2B_CREDENTIALS,
                "anthropic_credentials": TEST_ANTHROPIC_CREDENTIALS,
            },
            test_input={
                "e2b_credentials": TEST_E2B_CREDENTIALS_INPUT,
                "anthropic_credentials": TEST_ANTHROPIC_CREDENTIALS_INPUT,
                "prompt": "Create a hello world HTML file",
                "timeout": 300,
                "setup_commands": [],
                "working_directory": "/home/user",
                "session_id": "",
                "sandbox_id": "",
                "conversation_history": "",
                "dispose_sandbox": True,
            },
            test_output=[
                ("response", "Created index.html with hello world content"),
                (
                    "files",
                    [
                        {
                            "path": "/home/user/index.html",
                            "relative_path": "index.html",
                            "name": "index.html",
                            "content": "<html>Hello World</html>",
                            "workspace_ref": None,
                        }
                    ],
                ),
                (
                    "conversation_history",
                    "User: Create a hello world HTML file\n"
                    "Claude: Created index.html with hello world content",
                ),
                ("session_id", str),
                ("sandbox_id", None),  # None because dispose_sandbox=True in test_input
            ],
            test_mock={
                "execute_claude_code": lambda *args, **kwargs: (
                    "Created index.html with hello world content",  # response
                    [
                        SandboxFileOutput(
                            path="/home/user/index.html",
                            relative_path="index.html",
                            name="index.html",
                            content="<html>Hello World</html>",
                            workspace_ref=None,
                        )
                    ],  # files
                    "User: Create a hello world HTML file\n"
                    "Claude: Created index.html with hello world content",  # conversation_history
                    "test-session-id",  # session_id
                    "sandbox_id",  # sandbox_id
                ),
            },
        )

    async def execute_claude_code(
        self,
        e2b_api_key: str,
        anthropic_api_key: str,
        prompt: str,
        timeout: int,
        setup_commands: list[str],
        working_directory: str,
        session_id: str,
        existing_sandbox_id: str,
        conversation_history: str,
        dispose_sandbox: bool,
        execution_context: "ExecutionContext",
    ) -> tuple[str, list[SandboxFileOutput], str, str, str]:
        """
        Execute Claude Code in an E2B sandbox.

        Returns:
            Tuple of (response, files, conversation_history, session_id, sandbox_id)
        """

        # Validate that sandbox_id is provided when resuming a session
        if session_id and not existing_sandbox_id:
            raise ValueError(
                "sandbox_id is required when resuming a session with session_id. "
                "The session state is stored in the original sandbox. "
                "If the sandbox has timed out, use conversation_history instead "
                "to restore context on a fresh sandbox."
            )

        sandbox = None
        sandbox_id = ""

        try:
            # Either reconnect to existing sandbox or create a new one
            if existing_sandbox_id:
                # Reconnect to existing sandbox for conversation continuation
                sandbox = await BaseAsyncSandbox.connect(
                    sandbox_id=existing_sandbox_id,
                    api_key=e2b_api_key,
                )
            else:
                # Create new sandbox
                sandbox = await BaseAsyncSandbox.create(
                    template=self.DEFAULT_TEMPLATE,
                    api_key=e2b_api_key,
                    timeout=timeout,
                    envs={"ANTHROPIC_API_KEY": anthropic_api_key},
                )

                # Install Claude Code from npm (ensures we get the latest version)
                install_result = await sandbox.commands.run(
                    "npm install -g @anthropic-ai/claude-code@latest",
                    timeout=120,  # 2 min timeout for install
                )
                if install_result.exit_code != 0:
                    raise Exception(
                        f"Failed to install Claude Code: {install_result.stderr}"
                    )

                # Run any user-provided setup commands
                for cmd in setup_commands:
                    setup_result = await sandbox.commands.run(cmd)
                    if setup_result.exit_code != 0:
                        raise Exception(
                            f"Setup command failed: {cmd}\n"
                            f"Exit code: {setup_result.exit_code}\n"
                            f"Stdout: {setup_result.stdout}\n"
                            f"Stderr: {setup_result.stderr}"
                        )

            # Capture sandbox_id immediately after creation/connection
            # so it's available for error recovery if dispose_sandbox=False
            sandbox_id = sandbox.sandbox_id

            # Generate or use provided session ID
            current_session_id = session_id if session_id else str(uuid.uuid4())

            # Build base Claude flags
            base_flags = "-p --dangerously-skip-permissions --output-format json"

            # Add conversation history context if provided (for fresh sandbox continuation)
            history_flag = ""
            if conversation_history and not session_id:
                # Inject previous conversation as context via system prompt
                # Use consistent escaping via _escape_prompt helper
                escaped_history = self._escape_prompt(
                    f"Previous conversation context: {conversation_history}"
                )
                history_flag = f" --append-system-prompt {escaped_history}"

            # Build Claude command based on whether we're resuming or starting new
            # Use shlex.quote for working_directory and session IDs to prevent injection
            safe_working_dir = shlex.quote(working_directory)
            if session_id:
                # Resuming existing session (sandbox still alive)
                safe_session_id = shlex.quote(session_id)
                claude_command = (
                    f"cd {safe_working_dir} && "
                    f"echo {self._escape_prompt(prompt)} | "
                    f"claude --resume {safe_session_id} {base_flags}"
                )
            else:
                # New session with specific ID
                safe_current_session_id = shlex.quote(current_session_id)
                claude_command = (
                    f"cd {safe_working_dir} && "
                    f"echo {self._escape_prompt(prompt)} | "
                    f"claude --session-id {safe_current_session_id} {base_flags}{history_flag}"
                )

            # Capture timestamp before running Claude Code to filter files later
            # Capture timestamp 1 second in the past to avoid race condition with file creation
            timestamp_result = await sandbox.commands.run(
                "date -u -d '1 second ago' +%Y-%m-%dT%H:%M:%S"
            )
            if timestamp_result.exit_code != 0:
                raise RuntimeError(
                    f"Failed to capture timestamp: {timestamp_result.stderr}"
                )
            start_timestamp = (
                timestamp_result.stdout.strip() if timestamp_result.stdout else None
            )

            result = await sandbox.commands.run(
                claude_command,
                timeout=0,  # No command timeout - let sandbox timeout handle it
            )

            # Check for command failure
            if result.exit_code != 0:
                error_msg = result.stderr or result.stdout or "Unknown error"
                raise Exception(
                    f"Claude Code command failed with exit code {result.exit_code}:\n"
                    f"{error_msg}"
                )

            raw_output = result.stdout or ""

            # Parse JSON output to extract response and build conversation history
            response = ""
            new_conversation_history = conversation_history or ""

            try:
                # The JSON output contains the result
                output_data = json.loads(raw_output)
                response = output_data.get("result", raw_output)

                # Build conversation history entry
                turn_entry = f"User: {prompt}\nClaude: {response}"
                if new_conversation_history:
                    new_conversation_history = (
                        f"{new_conversation_history}\n\n{turn_entry}"
                    )
                else:
                    new_conversation_history = turn_entry

            except json.JSONDecodeError:
                # If not valid JSON, use raw output
                response = raw_output
                turn_entry = f"User: {prompt}\nClaude: {response}"
                if new_conversation_history:
                    new_conversation_history = (
                        f"{new_conversation_history}\n\n{turn_entry}"
                    )
                else:
                    new_conversation_history = turn_entry

            # Extract files created/modified during this run and store to workspace
            sandbox_files = await extract_and_store_sandbox_files(
                sandbox=sandbox,
                working_directory=working_directory,
                execution_context=execution_context,
                since_timestamp=start_timestamp,
                text_only=True,
            )

            return (
                response,
                sandbox_files,  # Already SandboxFileOutput objects
                new_conversation_history,
                current_session_id,
                sandbox_id,
            )

        except Exception as e:
            # Wrap exception with sandbox_id so caller can access/cleanup
            # the preserved sandbox when dispose_sandbox=False
            raise ClaudeCodeExecutionError(str(e), sandbox_id) from e

        finally:
            if dispose_sandbox and sandbox:
                await sandbox.kill()

    def _escape_prompt(self, prompt: str) -> str:
        """Escape the prompt for safe shell execution."""
        # Use single quotes and escape any single quotes in the prompt
        escaped = prompt.replace("'", "'\"'\"'")
        return f"'{escaped}'"

    async def run(
        self,
        input_data: Input,
        *,
        e2b_credentials: APIKeyCredentials,
        anthropic_credentials: APIKeyCredentials,
        execution_context: "ExecutionContext",
        **kwargs,
    ) -> BlockOutput:
        try:
            (
                response,
                files,
                conversation_history,
                session_id,
                sandbox_id,
            ) = await self.execute_claude_code(
                e2b_api_key=e2b_credentials.api_key.get_secret_value(),
                anthropic_api_key=anthropic_credentials.api_key.get_secret_value(),
                prompt=input_data.prompt,
                timeout=input_data.timeout,
                setup_commands=input_data.setup_commands,
                working_directory=input_data.working_directory,
                session_id=input_data.session_id,
                existing_sandbox_id=input_data.sandbox_id,
                conversation_history=input_data.conversation_history,
                dispose_sandbox=input_data.dispose_sandbox,
                execution_context=execution_context,
            )

            yield "response", response
            # Always yield files (empty list if none) to match Output schema
            yield "files", [f.model_dump() for f in files]
            # Always yield conversation_history so user can restore context on fresh sandbox
            yield "conversation_history", conversation_history
            # Always yield session_id so user can continue conversation
            yield "session_id", session_id
            # Always yield sandbox_id (None if disposed) to match Output schema
            yield "sandbox_id", sandbox_id if not input_data.dispose_sandbox else None

        except ClaudeCodeExecutionError as e:
            yield "error", str(e)
            # If sandbox was preserved (dispose_sandbox=False), yield sandbox_id
            # so user can reconnect to or clean up the orphaned sandbox
            if not input_data.dispose_sandbox and e.sandbox_id:
                yield "sandbox_id", e.sandbox_id
        except Exception as e:
            yield "error", str(e)