mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-15 01:05:13 -05:00
Compare commits
1 Commits
dev
...
feat/enric
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2d9975f09a |
@@ -11,45 +11,15 @@ import re
|
|||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
from typing import Any, cast
|
from typing import Any, cast
|
||||||
|
|
||||||
from backend.api.features.chat.sdk.tool_adapter import MCP_TOOL_PREFIX
|
from backend.api.features.chat.sdk.tool_adapter import (
|
||||||
|
BLOCKED_TOOLS,
|
||||||
|
DANGEROUS_PATTERNS,
|
||||||
|
MCP_TOOL_PREFIX,
|
||||||
|
WORKSPACE_SCOPED_TOOLS,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Tools that are blocked entirely (CLI/system access).
|
|
||||||
# "Bash" (capital) is the SDK built-in — it's NOT in allowed_tools but blocked
|
|
||||||
# here as defence-in-depth. The agent uses mcp__copilot__bash_exec instead,
|
|
||||||
# which has kernel-level network isolation (unshare --net).
|
|
||||||
BLOCKED_TOOLS = {
|
|
||||||
"Bash",
|
|
||||||
"bash",
|
|
||||||
"shell",
|
|
||||||
"exec",
|
|
||||||
"terminal",
|
|
||||||
"command",
|
|
||||||
}
|
|
||||||
|
|
||||||
# Tools allowed only when their path argument stays within the SDK workspace.
|
|
||||||
# The SDK uses these to handle oversized tool results (writes to tool-results/
|
|
||||||
# files, then reads them back) and for workspace file operations.
|
|
||||||
WORKSPACE_SCOPED_TOOLS = {"Read", "Write", "Edit", "Glob", "Grep"}
|
|
||||||
|
|
||||||
# Dangerous patterns in tool inputs
|
|
||||||
DANGEROUS_PATTERNS = [
|
|
||||||
r"sudo",
|
|
||||||
r"rm\s+-rf",
|
|
||||||
r"dd\s+if=",
|
|
||||||
r"/etc/passwd",
|
|
||||||
r"/etc/shadow",
|
|
||||||
r"chmod\s+777",
|
|
||||||
r"curl\s+.*\|.*sh",
|
|
||||||
r"wget\s+.*\|.*sh",
|
|
||||||
r"eval\s*\(",
|
|
||||||
r"exec\s*\(",
|
|
||||||
r"__import__",
|
|
||||||
r"os\.system",
|
|
||||||
r"subprocess",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def _deny(reason: str) -> dict[str, Any]:
|
def _deny(reason: str) -> dict[str, Any]:
|
||||||
"""Return a hook denial response."""
|
"""Return a hook denial response."""
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ from .response_adapter import SDKResponseAdapter
|
|||||||
from .security_hooks import create_security_hooks
|
from .security_hooks import create_security_hooks
|
||||||
from .tool_adapter import (
|
from .tool_adapter import (
|
||||||
COPILOT_TOOL_NAMES,
|
COPILOT_TOOL_NAMES,
|
||||||
|
SDK_DISALLOWED_TOOLS,
|
||||||
LongRunningCallback,
|
LongRunningCallback,
|
||||||
create_copilot_mcp_server,
|
create_copilot_mcp_server,
|
||||||
set_execution_context,
|
set_execution_context,
|
||||||
@@ -543,7 +544,7 @@ async def stream_chat_completion_sdk(
|
|||||||
"system_prompt": system_prompt,
|
"system_prompt": system_prompt,
|
||||||
"mcp_servers": {"copilot": mcp_server},
|
"mcp_servers": {"copilot": mcp_server},
|
||||||
"allowed_tools": COPILOT_TOOL_NAMES,
|
"allowed_tools": COPILOT_TOOL_NAMES,
|
||||||
"disallowed_tools": ["Bash"],
|
"disallowed_tools": SDK_DISALLOWED_TOOLS,
|
||||||
"hooks": security_hooks,
|
"hooks": security_hooks,
|
||||||
"cwd": sdk_cwd,
|
"cwd": sdk_cwd,
|
||||||
"max_buffer_size": config.claude_agent_max_buffer_size,
|
"max_buffer_size": config.claude_agent_max_buffer_size,
|
||||||
|
|||||||
@@ -310,7 +310,48 @@ def create_copilot_mcp_server():
|
|||||||
# Bash is NOT included — use the sandboxed MCP bash_exec tool instead,
|
# Bash is NOT included — use the sandboxed MCP bash_exec tool instead,
|
||||||
# which provides kernel-level network isolation via unshare --net.
|
# which provides kernel-level network isolation via unshare --net.
|
||||||
# Task allows spawning sub-agents (rate-limited by security hooks).
|
# Task allows spawning sub-agents (rate-limited by security hooks).
|
||||||
_SDK_BUILTIN_TOOLS = ["Read", "Write", "Edit", "Glob", "Grep", "Task"]
|
# WebSearch uses Brave Search via Anthropic's API — safe, no SSRF risk.
|
||||||
|
_SDK_BUILTIN_TOOLS = ["Read", "Write", "Edit", "Glob", "Grep", "Task", "WebSearch"]
|
||||||
|
|
||||||
|
# SDK built-in tools that must be explicitly blocked.
|
||||||
|
# Bash: dangerous — agent uses mcp__copilot__bash_exec with kernel-level
|
||||||
|
# network isolation (unshare --net) instead.
|
||||||
|
# WebFetch: SSRF risk — can reach internal network (localhost, 10.x, etc.).
|
||||||
|
# Agent uses the SSRF-protected mcp__copilot__web_fetch tool instead.
|
||||||
|
SDK_DISALLOWED_TOOLS = ["Bash", "WebFetch"]
|
||||||
|
|
||||||
|
# Tools that are blocked entirely in security hooks (defence-in-depth).
|
||||||
|
# Includes SDK_DISALLOWED_TOOLS plus common aliases/synonyms.
|
||||||
|
BLOCKED_TOOLS = {
|
||||||
|
*SDK_DISALLOWED_TOOLS,
|
||||||
|
"bash",
|
||||||
|
"shell",
|
||||||
|
"exec",
|
||||||
|
"terminal",
|
||||||
|
"command",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Tools allowed only when their path argument stays within the SDK workspace.
|
||||||
|
# The SDK uses these to handle oversized tool results (writes to tool-results/
|
||||||
|
# files, then reads them back) and for workspace file operations.
|
||||||
|
WORKSPACE_SCOPED_TOOLS = {"Read", "Write", "Edit", "Glob", "Grep"}
|
||||||
|
|
||||||
|
# Dangerous patterns in tool inputs
|
||||||
|
DANGEROUS_PATTERNS = [
|
||||||
|
r"sudo",
|
||||||
|
r"rm\s+-rf",
|
||||||
|
r"dd\s+if=",
|
||||||
|
r"/etc/passwd",
|
||||||
|
r"/etc/shadow",
|
||||||
|
r"chmod\s+777",
|
||||||
|
r"curl\s+.*\|.*sh",
|
||||||
|
r"wget\s+.*\|.*sh",
|
||||||
|
r"eval\s*\(",
|
||||||
|
r"exec\s*\(",
|
||||||
|
r"__import__",
|
||||||
|
r"os\.system",
|
||||||
|
r"subprocess",
|
||||||
|
]
|
||||||
|
|
||||||
# List of tool names for allowed_tools configuration
|
# List of tool names for allowed_tools configuration
|
||||||
# Include MCP tools, the MCP Read tool for oversized results,
|
# Include MCP tools, the MCP Read tool for oversized results,
|
||||||
|
|||||||
@@ -17,7 +17,6 @@ from backend.blocks.jina._auth import (
|
|||||||
from backend.blocks.search import GetRequest
|
from backend.blocks.search import GetRequest
|
||||||
from backend.data.model import SchemaField
|
from backend.data.model import SchemaField
|
||||||
from backend.util.exceptions import BlockExecutionError
|
from backend.util.exceptions import BlockExecutionError
|
||||||
from backend.util.request import HTTPClientError, HTTPServerError, validate_url
|
|
||||||
|
|
||||||
|
|
||||||
class SearchTheWebBlock(Block, GetRequest):
|
class SearchTheWebBlock(Block, GetRequest):
|
||||||
@@ -111,12 +110,7 @@ class ExtractWebsiteContentBlock(Block, GetRequest):
|
|||||||
self, input_data: Input, *, credentials: JinaCredentials, **kwargs
|
self, input_data: Input, *, credentials: JinaCredentials, **kwargs
|
||||||
) -> BlockOutput:
|
) -> BlockOutput:
|
||||||
if input_data.raw_content:
|
if input_data.raw_content:
|
||||||
try:
|
url = input_data.url
|
||||||
parsed_url, _, _ = await validate_url(input_data.url, [])
|
|
||||||
url = parsed_url.geturl()
|
|
||||||
except ValueError as e:
|
|
||||||
yield "error", f"Invalid URL: {e}"
|
|
||||||
return
|
|
||||||
headers = {}
|
headers = {}
|
||||||
else:
|
else:
|
||||||
url = f"https://r.jina.ai/{input_data.url}"
|
url = f"https://r.jina.ai/{input_data.url}"
|
||||||
@@ -125,20 +119,5 @@ class ExtractWebsiteContentBlock(Block, GetRequest):
|
|||||||
"Authorization": f"Bearer {credentials.api_key.get_secret_value()}",
|
"Authorization": f"Bearer {credentials.api_key.get_secret_value()}",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
content = await self.get_request(url, json=False, headers=headers)
|
||||||
content = await self.get_request(url, json=False, headers=headers)
|
|
||||||
except HTTPClientError as e:
|
|
||||||
yield "error", f"Client error ({e.status_code}) fetching {input_data.url}: {e}"
|
|
||||||
return
|
|
||||||
except HTTPServerError as e:
|
|
||||||
yield "error", f"Server error ({e.status_code}) fetching {input_data.url}: {e}"
|
|
||||||
return
|
|
||||||
except Exception as e:
|
|
||||||
yield "error", f"Failed to fetch {input_data.url}: {e}"
|
|
||||||
return
|
|
||||||
|
|
||||||
if not content:
|
|
||||||
yield "error", f"No content returned for {input_data.url}"
|
|
||||||
return
|
|
||||||
|
|
||||||
yield "content", content
|
yield "content", content
|
||||||
|
|||||||
@@ -1,66 +0,0 @@
|
|||||||
from typing import cast
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from backend.blocks.jina._auth import (
|
|
||||||
TEST_CREDENTIALS,
|
|
||||||
TEST_CREDENTIALS_INPUT,
|
|
||||||
JinaCredentialsInput,
|
|
||||||
)
|
|
||||||
from backend.blocks.jina.search import ExtractWebsiteContentBlock
|
|
||||||
from backend.util.request import HTTPClientError
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_extract_website_content_returns_content(monkeypatch):
|
|
||||||
block = ExtractWebsiteContentBlock()
|
|
||||||
input_data = block.Input(
|
|
||||||
url="https://example.com",
|
|
||||||
credentials=cast(JinaCredentialsInput, TEST_CREDENTIALS_INPUT),
|
|
||||||
raw_content=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def fake_get_request(url, json=False, headers=None):
|
|
||||||
assert url == "https://example.com"
|
|
||||||
assert headers == {}
|
|
||||||
return "page content"
|
|
||||||
|
|
||||||
monkeypatch.setattr(block, "get_request", fake_get_request)
|
|
||||||
|
|
||||||
results = [
|
|
||||||
output
|
|
||||||
async for output in block.run(
|
|
||||||
input_data=input_data, credentials=TEST_CREDENTIALS
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
assert ("content", "page content") in results
|
|
||||||
assert all(key != "error" for key, _ in results)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_extract_website_content_handles_http_error(monkeypatch):
|
|
||||||
block = ExtractWebsiteContentBlock()
|
|
||||||
input_data = block.Input(
|
|
||||||
url="https://example.com",
|
|
||||||
credentials=cast(JinaCredentialsInput, TEST_CREDENTIALS_INPUT),
|
|
||||||
raw_content=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def fake_get_request(url, json=False, headers=None):
|
|
||||||
raise HTTPClientError("HTTP 400 Error: Bad Request", 400)
|
|
||||||
|
|
||||||
monkeypatch.setattr(block, "get_request", fake_get_request)
|
|
||||||
|
|
||||||
results = [
|
|
||||||
output
|
|
||||||
async for output in block.run(
|
|
||||||
input_data=input_data, credentials=TEST_CREDENTIALS
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
assert ("content", "page content") not in results
|
|
||||||
error_messages = [value for key, value in results if key == "error"]
|
|
||||||
assert error_messages
|
|
||||||
assert "Client error (400)" in error_messages[0]
|
|
||||||
assert "https://example.com" in error_messages[0]
|
|
||||||
Reference in New Issue
Block a user