mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
feat(direct_benchmark): enable shell command execution with safety denylist
Enable agents to execute shell commands during benchmarks by setting execute_local_commands=True and using denylist mode to block dangerous commands (rm, sudo, chmod, kill, etc.) while allowing safe operations. Also adds ExecutePython challenge to test code execution capability. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"category": [
|
||||
"coding"
|
||||
],
|
||||
"cutoff": 60,
|
||||
"dependencies": [],
|
||||
"eval_id": "execute-python-001",
|
||||
"ground": {
|
||||
"answer": "Hello, World!",
|
||||
"eval": {
|
||||
"type": "file"
|
||||
},
|
||||
"files": [
|
||||
"hello.py"
|
||||
],
|
||||
"should_contain": [
|
||||
"Hello, World!"
|
||||
],
|
||||
"should_not_contain": []
|
||||
},
|
||||
"info": {
|
||||
"description": "Tests basic code execution capability",
|
||||
"difficulty": "trivial",
|
||||
"side_effects": []
|
||||
},
|
||||
"name": "ExecutePython",
|
||||
"task": "Write a Python script called 'hello.py' that prints 'Hello, World!' to stdout. Then execute it using the shell to verify it works. The script should be in the workspace."
|
||||
}
|
||||
@@ -10,6 +10,7 @@ from typing import Callable, Optional
|
||||
from autogpt.agent_factory.configurators import create_agent
|
||||
from autogpt.agents.agent import Agent
|
||||
from autogpt.app.config import AppConfig, ConfigBuilder
|
||||
|
||||
from forge.file_storage import FileStorageBackendName, get_storage
|
||||
from forge.llm.providers import MultiProvider
|
||||
|
||||
@@ -182,6 +183,30 @@ class AgentRunner:
|
||||
llm_provider=llm_provider,
|
||||
)
|
||||
|
||||
# Enable local command execution for benchmarks
|
||||
# Use denylist mode to block dangerous commands while allowing flexibility
|
||||
if hasattr(agent, "code_executor"):
|
||||
agent.code_executor.config.execute_local_commands = True
|
||||
agent.code_executor.config.shell_command_control = "denylist"
|
||||
agent.code_executor.config.shell_denylist = [
|
||||
"rm", # Block file removal
|
||||
"sudo", # Block privilege escalation
|
||||
"chmod", # Block permission changes
|
||||
"chown", # Block ownership changes
|
||||
"mkfs", # Block filesystem creation
|
||||
"dd", # Block disk operations
|
||||
"kill", # Block process killing
|
||||
"pkill", # Block process killing
|
||||
"killall", # Block process killing
|
||||
"reboot", # Block system reboot
|
||||
"shutdown", # Block system shutdown
|
||||
"poweroff", # Block system poweroff
|
||||
"halt", # Block system halt
|
||||
"init", # Block init commands
|
||||
"systemctl", # Block systemd commands
|
||||
"service", # Block service commands
|
||||
]
|
||||
|
||||
self._agent = agent
|
||||
self._llm_provider = llm_provider
|
||||
return agent
|
||||
|
||||
Reference in New Issue
Block a user