mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-13 17:18:08 -05:00
Compare commits
9 Commits
hackathon-
...
claude-cod
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
75a92fc3f3 | ||
|
|
2c6353a6a2 | ||
|
|
21b70ae9ae | ||
|
|
b007e02364 | ||
|
|
bbc289894a | ||
|
|
639ee5f073 | ||
|
|
898781134d | ||
|
|
7a28db1649 | ||
|
|
a916ea0f8f |
602
autogpt_platform/backend/backend/blocks/claude_code.py
Normal file
602
autogpt_platform/backend/backend/blocks/claude_code.py
Normal file
@@ -0,0 +1,602 @@
|
||||
import shlex
|
||||
from typing import Literal
|
||||
|
||||
from e2b import AsyncSandbox as BaseAsyncSandbox
|
||||
from pydantic import BaseModel, SecretStr
|
||||
|
||||
from backend.data.block import (
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
BlockSchemaInput,
|
||||
BlockSchemaOutput,
|
||||
)
|
||||
from backend.data.model import (
|
||||
APIKeyCredentials,
|
||||
CredentialsField,
|
||||
CredentialsMetaInput,
|
||||
SchemaField,
|
||||
)
|
||||
from backend.integrations.providers import ProviderName
|
||||
|
||||
# Test credentials for E2B
|
||||
TEST_E2B_CREDENTIALS = APIKeyCredentials(
|
||||
id="01234567-89ab-cdef-0123-456789abcdef",
|
||||
provider="e2b",
|
||||
api_key=SecretStr("mock-e2b-api-key"),
|
||||
title="Mock E2B API key",
|
||||
expires_at=None,
|
||||
)
|
||||
TEST_E2B_CREDENTIALS_INPUT = {
|
||||
"provider": TEST_E2B_CREDENTIALS.provider,
|
||||
"id": TEST_E2B_CREDENTIALS.id,
|
||||
"type": TEST_E2B_CREDENTIALS.type,
|
||||
"title": TEST_E2B_CREDENTIALS.title,
|
||||
}
|
||||
|
||||
# Test credentials for Anthropic
|
||||
TEST_ANTHROPIC_CREDENTIALS = APIKeyCredentials(
|
||||
id="2e568a2b-b2ea-475a-8564-9a676bf31c56",
|
||||
provider="anthropic",
|
||||
api_key=SecretStr("mock-anthropic-api-key"),
|
||||
title="Mock Anthropic API key",
|
||||
expires_at=None,
|
||||
)
|
||||
TEST_ANTHROPIC_CREDENTIALS_INPUT = {
|
||||
"provider": TEST_ANTHROPIC_CREDENTIALS.provider,
|
||||
"id": TEST_ANTHROPIC_CREDENTIALS.id,
|
||||
"type": TEST_ANTHROPIC_CREDENTIALS.type,
|
||||
"title": TEST_ANTHROPIC_CREDENTIALS.title,
|
||||
}
|
||||
|
||||
|
||||
class ClaudeCodeBlock(Block):
|
||||
"""
|
||||
Execute tasks using Claude Code (Anthropic's AI coding assistant) in an E2B sandbox.
|
||||
|
||||
Claude Code can create files, install tools, run commands, and perform complex
|
||||
coding tasks autonomously within a secure sandbox environment.
|
||||
"""
|
||||
|
||||
# Use base template - we'll install Claude Code ourselves for latest version
|
||||
DEFAULT_TEMPLATE = "base"
|
||||
|
||||
class Input(BlockSchemaInput):
|
||||
e2b_credentials: CredentialsMetaInput[
|
||||
Literal[ProviderName.E2B], Literal["api_key"]
|
||||
] = CredentialsField(
|
||||
description=(
|
||||
"API key for the E2B platform to create the sandbox. "
|
||||
"Get one at https://e2b.dev/docs"
|
||||
),
|
||||
)
|
||||
|
||||
anthropic_credentials: CredentialsMetaInput[
|
||||
Literal[ProviderName.ANTHROPIC], Literal["api_key"]
|
||||
] = CredentialsField(
|
||||
description=(
|
||||
"API key for Anthropic to power Claude Code. "
|
||||
"Get one at https://console.anthropic.com/"
|
||||
),
|
||||
)
|
||||
|
||||
prompt: str = SchemaField(
|
||||
description=(
|
||||
"The task or instruction for Claude Code to execute. "
|
||||
"Claude Code can create files, install packages, run commands, "
|
||||
"and perform complex coding tasks."
|
||||
),
|
||||
placeholder="Create a hello world index.html file",
|
||||
default="",
|
||||
advanced=False,
|
||||
)
|
||||
|
||||
timeout: int = SchemaField(
|
||||
description=(
|
||||
"Sandbox timeout in seconds. Claude Code tasks can take "
|
||||
"a while, so set this appropriately for your task complexity. "
|
||||
"Note: This only applies when creating a new sandbox. "
|
||||
"When reconnecting to an existing sandbox via sandbox_id, "
|
||||
"the original timeout is retained."
|
||||
),
|
||||
default=300, # 5 minutes default
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
setup_commands: list[str] = SchemaField(
|
||||
description=(
|
||||
"Optional shell commands to run before executing Claude Code. "
|
||||
"Useful for installing dependencies or setting up the environment."
|
||||
),
|
||||
default_factory=list,
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
working_directory: str = SchemaField(
|
||||
description="Working directory for Claude Code to operate in.",
|
||||
default="/home/user",
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
# Session/continuation support
|
||||
session_id: str = SchemaField(
|
||||
description=(
|
||||
"Session ID to resume a previous conversation. "
|
||||
"Leave empty for a new conversation. "
|
||||
"Use the session_id from a previous run to continue that conversation."
|
||||
),
|
||||
default="",
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
sandbox_id: str = SchemaField(
|
||||
description=(
|
||||
"Sandbox ID to reconnect to an existing sandbox. "
|
||||
"Required when resuming a session (along with session_id). "
|
||||
"Use the sandbox_id from a previous run where dispose_sandbox was False."
|
||||
),
|
||||
default="",
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
conversation_history: str = SchemaField(
|
||||
description=(
|
||||
"Previous conversation history to continue from. "
|
||||
"Use this to restore context on a fresh sandbox if the previous one timed out. "
|
||||
"Pass the conversation_history output from a previous run."
|
||||
),
|
||||
default="",
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
dispose_sandbox: bool = SchemaField(
|
||||
description=(
|
||||
"Whether to dispose of the sandbox immediately after execution. "
|
||||
"Set to False if you want to continue the conversation later "
|
||||
"(you'll need both sandbox_id and session_id from the output)."
|
||||
),
|
||||
default=True,
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
class FileOutput(BaseModel):
|
||||
"""A file extracted from the sandbox."""
|
||||
|
||||
path: str
|
||||
relative_path: str # Path relative to working directory (for GitHub, etc.)
|
||||
name: str
|
||||
content: str
|
||||
|
||||
class Output(BlockSchemaOutput):
|
||||
response: str = SchemaField(
|
||||
description="The output/response from Claude Code execution"
|
||||
)
|
||||
files: list["ClaudeCodeBlock.FileOutput"] = SchemaField(
|
||||
description=(
|
||||
"List of files created/modified by Claude Code. "
|
||||
"Each file has 'path', 'name', and 'content' fields."
|
||||
)
|
||||
)
|
||||
conversation_history: str = SchemaField(
|
||||
description=(
|
||||
"Full conversation history including this turn. "
|
||||
"Pass this to conversation_history input to continue on a fresh sandbox "
|
||||
"if the previous sandbox timed out."
|
||||
)
|
||||
)
|
||||
session_id: str = SchemaField(
|
||||
description=(
|
||||
"Session ID for this conversation. "
|
||||
"Pass this back along with sandbox_id to continue the conversation."
|
||||
)
|
||||
)
|
||||
sandbox_id: str = SchemaField(
|
||||
description=(
|
||||
"ID of the sandbox instance. "
|
||||
"Pass this back along with session_id to continue the conversation "
|
||||
"(only available if dispose_sandbox was False)."
|
||||
)
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="4e34f4a5-9b89-4326-ba77-2dd6750b7194",
|
||||
description=(
|
||||
"Execute tasks using Claude Code in an E2B sandbox. "
|
||||
"Claude Code can create files, install tools, run commands, "
|
||||
"and perform complex coding tasks autonomously."
|
||||
),
|
||||
categories={BlockCategory.DEVELOPER_TOOLS, BlockCategory.AI},
|
||||
input_schema=ClaudeCodeBlock.Input,
|
||||
output_schema=ClaudeCodeBlock.Output,
|
||||
test_credentials={
|
||||
"e2b_credentials": TEST_E2B_CREDENTIALS,
|
||||
"anthropic_credentials": TEST_ANTHROPIC_CREDENTIALS,
|
||||
},
|
||||
test_input={
|
||||
"e2b_credentials": TEST_E2B_CREDENTIALS_INPUT,
|
||||
"anthropic_credentials": TEST_ANTHROPIC_CREDENTIALS_INPUT,
|
||||
"prompt": "Create a hello world HTML file",
|
||||
"timeout": 300,
|
||||
"setup_commands": [],
|
||||
"working_directory": "/home/user",
|
||||
"session_id": "",
|
||||
"sandbox_id": "",
|
||||
"conversation_history": "",
|
||||
"dispose_sandbox": True,
|
||||
},
|
||||
test_output=[
|
||||
("response", "Created index.html with hello world content"),
|
||||
(
|
||||
"files",
|
||||
[
|
||||
{
|
||||
"path": "/home/user/index.html",
|
||||
"relative_path": "index.html",
|
||||
"name": "index.html",
|
||||
"content": "<html>Hello World</html>",
|
||||
}
|
||||
],
|
||||
),
|
||||
(
|
||||
"conversation_history",
|
||||
"User: Create a hello world HTML file\n"
|
||||
"Claude: Created index.html with hello world content",
|
||||
),
|
||||
("session_id", str),
|
||||
],
|
||||
test_mock={
|
||||
"execute_claude_code": lambda *args, **kwargs: (
|
||||
"Created index.html with hello world content", # response
|
||||
[
|
||||
ClaudeCodeBlock.FileOutput(
|
||||
path="/home/user/index.html",
|
||||
relative_path="index.html",
|
||||
name="index.html",
|
||||
content="<html>Hello World</html>",
|
||||
)
|
||||
], # files
|
||||
"User: Create a hello world HTML file\n"
|
||||
"Claude: Created index.html with hello world content", # conversation_history
|
||||
"test-session-id", # session_id
|
||||
"sandbox_id", # sandbox_id
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
async def execute_claude_code(
|
||||
self,
|
||||
e2b_api_key: str,
|
||||
anthropic_api_key: str,
|
||||
prompt: str,
|
||||
timeout: int,
|
||||
setup_commands: list[str],
|
||||
working_directory: str,
|
||||
session_id: str,
|
||||
existing_sandbox_id: str,
|
||||
conversation_history: str,
|
||||
dispose_sandbox: bool,
|
||||
) -> tuple[str, list["ClaudeCodeBlock.FileOutput"], str, str, str]:
|
||||
"""
|
||||
Execute Claude Code in an E2B sandbox.
|
||||
|
||||
Returns:
|
||||
Tuple of (response, files, conversation_history, session_id, sandbox_id)
|
||||
"""
|
||||
import json
|
||||
import uuid
|
||||
|
||||
# Validate that sandbox_id is provided when resuming a session
|
||||
if session_id and not existing_sandbox_id:
|
||||
raise ValueError(
|
||||
"sandbox_id is required when resuming a session with session_id. "
|
||||
"The session state is stored in the original sandbox. "
|
||||
"If the sandbox has timed out, use conversation_history instead "
|
||||
"to restore context on a fresh sandbox."
|
||||
)
|
||||
|
||||
sandbox = None
|
||||
|
||||
try:
|
||||
# Either reconnect to existing sandbox or create a new one
|
||||
if existing_sandbox_id:
|
||||
# Reconnect to existing sandbox for conversation continuation
|
||||
sandbox = await BaseAsyncSandbox.connect(
|
||||
sandbox_id=existing_sandbox_id,
|
||||
api_key=e2b_api_key,
|
||||
)
|
||||
else:
|
||||
# Create new sandbox
|
||||
sandbox = await BaseAsyncSandbox.create(
|
||||
template=self.DEFAULT_TEMPLATE,
|
||||
api_key=e2b_api_key,
|
||||
timeout=timeout,
|
||||
envs={"ANTHROPIC_API_KEY": anthropic_api_key},
|
||||
)
|
||||
|
||||
# Install Claude Code from npm (ensures we get the latest version)
|
||||
install_result = await sandbox.commands.run(
|
||||
"npm install -g @anthropic-ai/claude-code@latest",
|
||||
timeout=120, # 2 min timeout for install
|
||||
)
|
||||
if install_result.exit_code != 0:
|
||||
raise Exception(
|
||||
f"Failed to install Claude Code: {install_result.stderr}"
|
||||
)
|
||||
|
||||
# Run any user-provided setup commands
|
||||
for cmd in setup_commands:
|
||||
setup_result = await sandbox.commands.run(cmd)
|
||||
if setup_result.exit_code != 0:
|
||||
raise Exception(
|
||||
f"Setup command failed: {cmd}\n"
|
||||
f"Exit code: {setup_result.exit_code}\n"
|
||||
f"Stdout: {setup_result.stdout}\n"
|
||||
f"Stderr: {setup_result.stderr}"
|
||||
)
|
||||
|
||||
# Generate or use provided session ID
|
||||
current_session_id = session_id if session_id else str(uuid.uuid4())
|
||||
|
||||
# Build base Claude flags
|
||||
base_flags = "-p --dangerously-skip-permissions --output-format json"
|
||||
|
||||
# Add conversation history context if provided (for fresh sandbox continuation)
|
||||
history_flag = ""
|
||||
if conversation_history and not session_id:
|
||||
# Inject previous conversation as context via system prompt
|
||||
# Use consistent escaping via _escape_prompt helper
|
||||
escaped_history = self._escape_prompt(
|
||||
f"Previous conversation context: {conversation_history}"
|
||||
)
|
||||
history_flag = f" --append-system-prompt {escaped_history}"
|
||||
|
||||
# Build Claude command based on whether we're resuming or starting new
|
||||
# Use shlex.quote for working_directory and session IDs to prevent injection
|
||||
safe_working_dir = shlex.quote(working_directory)
|
||||
if session_id:
|
||||
# Resuming existing session (sandbox still alive)
|
||||
safe_session_id = shlex.quote(session_id)
|
||||
claude_command = (
|
||||
f"cd {safe_working_dir} && "
|
||||
f"echo {self._escape_prompt(prompt)} | "
|
||||
f"claude --resume {safe_session_id} {base_flags}"
|
||||
)
|
||||
else:
|
||||
# New session with specific ID
|
||||
safe_current_session_id = shlex.quote(current_session_id)
|
||||
claude_command = (
|
||||
f"cd {safe_working_dir} && "
|
||||
f"echo {self._escape_prompt(prompt)} | "
|
||||
f"claude --session-id {safe_current_session_id} {base_flags}{history_flag}"
|
||||
)
|
||||
|
||||
result = await sandbox.commands.run(
|
||||
claude_command,
|
||||
timeout=0, # No command timeout - let sandbox timeout handle it
|
||||
)
|
||||
|
||||
# Check for command failure
|
||||
if result.exit_code != 0:
|
||||
error_msg = result.stderr or result.stdout or "Unknown error"
|
||||
raise Exception(
|
||||
f"Claude Code command failed with exit code {result.exit_code}:\n"
|
||||
f"{error_msg}"
|
||||
)
|
||||
|
||||
raw_output = result.stdout or ""
|
||||
sandbox_id = sandbox.sandbox_id
|
||||
|
||||
# Parse JSON output to extract response and build conversation history
|
||||
response = ""
|
||||
new_conversation_history = conversation_history or ""
|
||||
|
||||
try:
|
||||
# The JSON output contains the result
|
||||
output_data = json.loads(raw_output)
|
||||
response = output_data.get("result", raw_output)
|
||||
|
||||
# Build conversation history entry
|
||||
turn_entry = f"User: {prompt}\nClaude: {response}"
|
||||
if new_conversation_history:
|
||||
new_conversation_history = (
|
||||
f"{new_conversation_history}\n\n{turn_entry}"
|
||||
)
|
||||
else:
|
||||
new_conversation_history = turn_entry
|
||||
|
||||
except json.JSONDecodeError:
|
||||
# If not valid JSON, use raw output
|
||||
response = raw_output
|
||||
turn_entry = f"User: {prompt}\nClaude: {response}"
|
||||
if new_conversation_history:
|
||||
new_conversation_history = (
|
||||
f"{new_conversation_history}\n\n{turn_entry}"
|
||||
)
|
||||
else:
|
||||
new_conversation_history = turn_entry
|
||||
|
||||
# Extract files from the working directory
|
||||
files = await self._extract_files(sandbox, working_directory)
|
||||
|
||||
return (
|
||||
response,
|
||||
files,
|
||||
new_conversation_history,
|
||||
current_session_id,
|
||||
sandbox_id,
|
||||
)
|
||||
|
||||
finally:
|
||||
if dispose_sandbox and sandbox:
|
||||
await sandbox.kill()
|
||||
|
||||
async def _extract_files(
|
||||
self,
|
||||
sandbox: BaseAsyncSandbox,
|
||||
working_directory: str,
|
||||
) -> list["ClaudeCodeBlock.FileOutput"]:
|
||||
"""
|
||||
Extract files from the sandbox working directory.
|
||||
|
||||
Returns:
|
||||
List of FileOutput objects with path, name, and content
|
||||
"""
|
||||
files: list[ClaudeCodeBlock.FileOutput] = []
|
||||
|
||||
# Text file extensions we can safely read
|
||||
text_extensions = {
|
||||
".txt",
|
||||
".md",
|
||||
".html",
|
||||
".htm",
|
||||
".css",
|
||||
".js",
|
||||
".ts",
|
||||
".jsx",
|
||||
".tsx",
|
||||
".json",
|
||||
".xml",
|
||||
".yaml",
|
||||
".yml",
|
||||
".toml",
|
||||
".ini",
|
||||
".cfg",
|
||||
".conf",
|
||||
".py",
|
||||
".rb",
|
||||
".php",
|
||||
".java",
|
||||
".c",
|
||||
".cpp",
|
||||
".h",
|
||||
".hpp",
|
||||
".cs",
|
||||
".go",
|
||||
".rs",
|
||||
".swift",
|
||||
".kt",
|
||||
".scala",
|
||||
".sh",
|
||||
".bash",
|
||||
".zsh",
|
||||
".sql",
|
||||
".graphql",
|
||||
".env",
|
||||
".gitignore",
|
||||
".dockerfile",
|
||||
"Dockerfile",
|
||||
".vue",
|
||||
".svelte",
|
||||
".astro",
|
||||
".mdx",
|
||||
".rst",
|
||||
".tex",
|
||||
".csv",
|
||||
".log",
|
||||
}
|
||||
|
||||
try:
|
||||
# List files recursively using find command
|
||||
# Exclude node_modules and .git directories, but allow hidden files
|
||||
# like .env and .gitignore (they're filtered by text_extensions later)
|
||||
safe_working_dir = shlex.quote(working_directory)
|
||||
find_result = await sandbox.commands.run(
|
||||
f"find {safe_working_dir} -type f "
|
||||
f"-not -path '*/node_modules/*' "
|
||||
f"-not -path '*/.git/*' "
|
||||
f"2>/dev/null | head -100"
|
||||
)
|
||||
|
||||
if find_result.stdout:
|
||||
for file_path in find_result.stdout.strip().split("\n"):
|
||||
if not file_path:
|
||||
continue
|
||||
|
||||
# Check if it's a text file we can read
|
||||
is_text = any(
|
||||
file_path.endswith(ext) for ext in text_extensions
|
||||
) or file_path.endswith("Dockerfile")
|
||||
|
||||
if is_text:
|
||||
try:
|
||||
content = await sandbox.files.read(file_path)
|
||||
# Handle bytes or string
|
||||
if isinstance(content, bytes):
|
||||
content = content.decode("utf-8", errors="replace")
|
||||
|
||||
# Extract filename from path
|
||||
file_name = file_path.split("/")[-1]
|
||||
|
||||
# Calculate relative path by stripping working directory
|
||||
relative_path = file_path
|
||||
if file_path.startswith(working_directory):
|
||||
relative_path = file_path[len(working_directory) :]
|
||||
# Remove leading slash if present
|
||||
if relative_path.startswith("/"):
|
||||
relative_path = relative_path[1:]
|
||||
|
||||
files.append(
|
||||
ClaudeCodeBlock.FileOutput(
|
||||
path=file_path,
|
||||
relative_path=relative_path,
|
||||
name=file_name,
|
||||
content=content,
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
# Skip files that can't be read
|
||||
pass
|
||||
|
||||
except Exception:
|
||||
# If file extraction fails, return empty results
|
||||
pass
|
||||
|
||||
return files
|
||||
|
||||
def _escape_prompt(self, prompt: str) -> str:
|
||||
"""Escape the prompt for safe shell execution."""
|
||||
# Use single quotes and escape any single quotes in the prompt
|
||||
escaped = prompt.replace("'", "'\"'\"'")
|
||||
return f"'{escaped}'"
|
||||
|
||||
async def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
e2b_credentials: APIKeyCredentials,
|
||||
anthropic_credentials: APIKeyCredentials,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
try:
|
||||
(
|
||||
response,
|
||||
files,
|
||||
conversation_history,
|
||||
session_id,
|
||||
sandbox_id,
|
||||
) = await self.execute_claude_code(
|
||||
e2b_api_key=e2b_credentials.api_key.get_secret_value(),
|
||||
anthropic_api_key=anthropic_credentials.api_key.get_secret_value(),
|
||||
prompt=input_data.prompt,
|
||||
timeout=input_data.timeout,
|
||||
setup_commands=input_data.setup_commands,
|
||||
working_directory=input_data.working_directory,
|
||||
session_id=input_data.session_id,
|
||||
existing_sandbox_id=input_data.sandbox_id,
|
||||
conversation_history=input_data.conversation_history,
|
||||
dispose_sandbox=input_data.dispose_sandbox,
|
||||
)
|
||||
|
||||
yield "response", response
|
||||
# Always yield files (empty list if none) to match Output schema
|
||||
yield "files", [f.model_dump() for f in files]
|
||||
# Always yield conversation_history so user can restore context on fresh sandbox
|
||||
yield "conversation_history", conversation_history
|
||||
# Always yield session_id so user can continue conversation
|
||||
yield "session_id", session_id
|
||||
if not input_data.dispose_sandbox and sandbox_id:
|
||||
yield "sandbox_id", sandbox_id
|
||||
|
||||
except Exception as e:
|
||||
yield "error", str(e)
|
||||
Reference in New Issue
Block a user