feat(forge): Component-specific configuration (#7170)

Remove many env vars and use component-level configuration that could be loaded from file instead.

### Changed

- `BaseAgent` provides `serialize_configs` and `deserialize_configs` that can save and load all component configuration as json `str`. Deserialized components/values overwrite existing values, so not all values need to be present in the serialized config.
- Decoupled `forge/content_processing/text.py` from `Config`
- Kept `execute_local_commands` in `Config` because it's needed to know if OS info should be included in the prompt
- Updated docs to reflect changes
- Renamed `Config` to `AppConfig`

### Added

- Added `ConfigurableComponent` class for components and following configs:
  - `ActionHistoryConfiguration`
  - `CodeExecutorConfiguration`
  - `FileManagerConfiguration` - now file manager allows to have multiple agents using the same workspace
  - `GitOperationsConfiguration`
  - `ImageGeneratorConfiguration`
  - `WebSearchConfiguration`
  - `WebSeleniumConfiguration`
- `BaseConfig` in `forge` and moved `Config` (now inherits from `BaseConfig`) back to `autogpt`
- Required `config_class` attribute for the `ConfigurableComponent` class that should be set to configuration class for a component
`--component-config-file` CLI option and `COMPONENT_CONFIG_FILE` env var and field in `Config`. This option allows to load configuration from a specific file, CLI option takes precedence over env var.
- Added comments to config models

### Removed

- Unused `change_agent_id` method from `FileManagerComponent`
- Unused `allow_downloads` from `Config` and CLI options (it should be in web component config if needed)
- CLI option `--browser-name` (the option is inside `WebSeleniumConfiguration`)
- Unused `workspace_directory` from CLI options
- No longer needed variables from `Config` and docs
- Unused fields from `Config`: `image_size`, `audio_to_text_provider`, `huggingface_audio_to_text_model`
- Removed `files` and `workspace` class attributes from `FileManagerComponent`
This commit is contained in:
Krzysztof Czerwinski
2024-06-19 09:14:01 +01:00
committed by GitHub
parent 02dc198a9f
commit c19ab2b24f
47 changed files with 772 additions and 722 deletions

View File

@@ -15,8 +15,8 @@
## This helps us to spot and solve problems earlier & faster. (Default: DISABLED)
# TELEMETRY_OPT_IN=true
## EXECUTE_LOCAL_COMMANDS - Allow local command execution (Default: False)
# EXECUTE_LOCAL_COMMANDS=False
## COMPONENT_CONFIG_FILE - Path to the json config file (Default: None)
# COMPONENT_CONFIG_FILE=
### Workspace ###
@@ -44,9 +44,6 @@
### Miscellaneous ###
## USER_AGENT - Define the user-agent used by the requests library to browse website (string)
# USER_AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
## AUTHORISE COMMAND KEY - Key to authorise commands
# AUTHORISE_COMMAND_KEY=y
@@ -96,38 +93,12 @@
## EMBEDDING_MODEL - Model to use for creating embeddings
# EMBEDDING_MODEL=text-embedding-3-small
################################################################################
### SHELL EXECUTION
################################################################################
## SHELL_COMMAND_CONTROL - Whether to use "allowlist" or "denylist" to determine what shell commands can be executed (Default: denylist)
# SHELL_COMMAND_CONTROL=denylist
## ONLY if SHELL_COMMAND_CONTROL is set to denylist:
## SHELL_DENYLIST - List of shell commands that ARE NOT allowed to be executed by AutoGPT (Default: sudo,su)
# SHELL_DENYLIST=sudo,su
## ONLY if SHELL_COMMAND_CONTROL is set to allowlist:
## SHELL_ALLOWLIST - List of shell commands that ARE allowed to be executed by AutoGPT (Default: None)
# SHELL_ALLOWLIST=
################################################################################
### IMAGE GENERATION PROVIDER
################################################################################
### Common
## IMAGE_PROVIDER - Image provider (Default: dalle)
# IMAGE_PROVIDER=dalle
## IMAGE_SIZE - Image size (Default: 256)
# IMAGE_SIZE=256
### Huggingface (IMAGE_PROVIDER=huggingface)
## HUGGINGFACE_IMAGE_MODEL - Text-to-image model from Huggingface (Default: CompVis/stable-diffusion-v1-4)
# HUGGINGFACE_IMAGE_MODEL=CompVis/stable-diffusion-v1-4
## HUGGINGFACE_API_TOKEN - HuggingFace API token (Default: None)
# HUGGINGFACE_API_TOKEN=
@@ -136,19 +107,6 @@
## SD_WEBUI_AUTH - Stable Diffusion Web UI username:password pair (Default: None)
# SD_WEBUI_AUTH=
## SD_WEBUI_URL - Stable Diffusion Web UI API URL (Default: http://localhost:7860)
# SD_WEBUI_URL=http://localhost:7860
################################################################################
### AUDIO TO TEXT PROVIDER
################################################################################
## AUDIO_TO_TEXT_PROVIDER - Audio-to-text provider (Default: huggingface)
# AUDIO_TO_TEXT_PROVIDER=huggingface
## HUGGINGFACE_AUDIO_TO_TEXT_MODEL - The model for HuggingFace to use (Default: CompVis/stable-diffusion-v1-4)
# HUGGINGFACE_AUDIO_TO_TEXT_MODEL=CompVis/stable-diffusion-v1-4
################################################################################
### GITHUB
################################################################################
@@ -163,18 +121,6 @@
### WEB BROWSING
################################################################################
## HEADLESS_BROWSER - Whether to run the browser in headless mode (default: True)
# HEADLESS_BROWSER=True
## USE_WEB_BROWSER - Sets the web-browser driver to use with selenium (default: chrome)
# USE_WEB_BROWSER=chrome
## BROWSE_CHUNK_MAX_LENGTH - When browsing website, define the length of chunks to summarize (Default: 3000)
# BROWSE_CHUNK_MAX_LENGTH=3000
## BROWSE_SPACY_LANGUAGE_MODEL - spaCy language model](https://spacy.io/usage/models) to use when creating chunks. (Default: en_core_web_sm)
# BROWSE_SPACY_LANGUAGE_MODEL=en_core_web_sm
## GOOGLE_API_KEY - Google API key (Default: None)
# GOOGLE_API_KEY=
@@ -198,13 +144,6 @@
## ELEVENLABS_VOICE_ID - Eleven Labs voice ID (Example: None)
# ELEVENLABS_VOICE_ID=
################################################################################
### CHAT MESSAGES
################################################################################
## CHAT_MESSAGES_ENABLED - Enable chat messages (Default: False)
# CHAT_MESSAGES_ENABLED=False
################################################################################
### LOGGING
################################################################################

View File

@@ -68,10 +68,6 @@ Options:
continuous mode
--speak Enable Speak Mode
--debug Enable Debug Mode
-b, --browser-name TEXT Specifies which web-browser to use when
using selenium to scrape the web.
--allow-downloads Dangerous: Allows AutoGPT to download files
natively.
--skip-news Specifies whether to suppress the output of
latest news on startup.
--install-plugin-deps Installs external dependencies for 3rd party
@@ -90,6 +86,7 @@ Options:
--override-directives If specified, --constraint, --resource and
--best-practice will override the AI's
directives instead of being appended to them
--component-config-file TEXT Path to the json configuration file.
--help Show this message and exit.
```
</details>
@@ -111,10 +108,6 @@ Usage: python -m autogpt serve [OPTIONS]
Options:
--debug Enable Debug Mode
-b, --browser-name TEXT Specifies which web-browser to use when using
selenium to scrape the web.
--allow-downloads Dangerous: Allows AutoGPT to download files
natively.
--install-plugin-deps Installs external dependencies for 3rd party
plugins.
--help Show this message and exit.

View File

@@ -2,17 +2,17 @@ from typing import Optional
from forge.config.ai_directives import AIDirectives
from forge.config.ai_profile import AIProfile
from forge.config.config import Config
from forge.file_storage.base import FileStorage
from forge.llm.providers import MultiProvider
from autogpt.agents.agent import Agent, AgentConfiguration, AgentSettings
from autogpt.app.config import AppConfig
def create_agent(
agent_id: str,
task: str,
app_config: Config,
app_config: AppConfig,
file_storage: FileStorage,
llm_provider: MultiProvider,
ai_profile: Optional[AIProfile] = None,
@@ -38,7 +38,7 @@ def create_agent(
def configure_agent_with_state(
state: AgentSettings,
app_config: Config,
app_config: AppConfig,
file_storage: FileStorage,
llm_provider: MultiProvider,
) -> Agent:
@@ -51,7 +51,7 @@ def configure_agent_with_state(
def _configure_agent(
app_config: Config,
app_config: AppConfig,
llm_provider: MultiProvider,
file_storage: FileStorage,
agent_id: str = "",
@@ -80,7 +80,7 @@ def _configure_agent(
settings=agent_state,
llm_provider=llm_provider,
file_storage=file_storage,
legacy_config=app_config,
app_config=app_config,
)
@@ -89,7 +89,7 @@ def create_agent_state(
task: str,
ai_profile: AIProfile,
directives: AIDirectives,
app_config: Config,
app_config: AppConfig,
) -> AgentSettings:
return AgentSettings(
agent_id=agent_id,

View File

@@ -6,7 +6,7 @@ from forge.file_storage.base import FileStorage
if TYPE_CHECKING:
from autogpt.agents.agent import Agent
from forge.config.config import Config
from autogpt.app.config import AppConfig
from forge.llm.providers import MultiProvider
from .configurators import _configure_agent
@@ -16,7 +16,7 @@ from .profile_generator import generate_agent_profile_for_task
async def generate_agent_for_task(
agent_id: str,
task: str,
app_config: Config,
app_config: AppConfig,
file_storage: FileStorage,
llm_provider: MultiProvider,
) -> Agent:

View File

@@ -3,7 +3,6 @@ import logging
from forge.config.ai_directives import AIDirectives
from forge.config.ai_profile import AIProfile
from forge.config.config import Config
from forge.llm.prompting import ChatPrompt, LanguageModelClassification, PromptStrategy
from forge.llm.providers import MultiProvider
from forge.llm.providers.schema import (
@@ -14,6 +13,8 @@ from forge.llm.providers.schema import (
from forge.models.config import SystemConfiguration, UserConfigurable
from forge.models.json_schema import JSONSchema
from autogpt.app.config import AppConfig
logger = logging.getLogger(__name__)
@@ -212,7 +213,7 @@ class AgentProfileGenerator(PromptStrategy):
async def generate_agent_profile_for_task(
task: str,
app_config: Config,
app_config: AppConfig,
llm_provider: MultiProvider,
) -> tuple[AIProfile, AIDirectives]:
"""Generates an AIConfig object from the given string.

View File

@@ -26,10 +26,10 @@ class MyAgent(Agent):
settings: AgentSettings,
llm_provider: MultiProvider
file_storage: FileStorage,
legacy_config: Config,
app_config: AppConfig,
):
# Call the parent constructor to bring in the default components
super().__init__(settings, llm_provider, file_storage, legacy_config)
super().__init__(settings, llm_provider, file_storage, app_config)
# Add your custom component
self.my_component = MyComponent()
```

View File

@@ -18,7 +18,11 @@ from forge.components.action_history import (
ActionHistoryComponent,
EpisodicActionHistory,
)
from forge.components.code_executor.code_executor import CodeExecutorComponent
from forge.components.action_history.action_history import ActionHistoryConfiguration
from forge.components.code_executor.code_executor import (
CodeExecutorComponent,
CodeExecutorConfiguration,
)
from forge.components.context.context import AgentContext, ContextComponent
from forge.components.file_manager import FileManagerComponent
from forge.components.git_operations import GitOperationsComponent
@@ -58,7 +62,7 @@ from .prompt_strategies.one_shot import (
)
if TYPE_CHECKING:
from forge.config.config import Config
from autogpt.app.config import AppConfig
logger = logging.getLogger(__name__)
@@ -91,7 +95,7 @@ class Agent(BaseAgent[OneShotAgentActionProposal], Configurable[AgentSettings]):
settings: AgentSettings,
llm_provider: MultiProvider,
file_storage: FileStorage,
legacy_config: Config,
app_config: AppConfig,
):
super().__init__(settings)
@@ -109,31 +113,35 @@ class Agent(BaseAgent[OneShotAgentActionProposal], Configurable[AgentSettings]):
self.system = SystemComponent()
self.history = ActionHistoryComponent(
settings.history,
self.send_token_limit,
lambda x: self.llm_provider.count_tokens(x, self.llm.name),
legacy_config,
llm_provider,
ActionHistoryConfiguration(
model_name=app_config.fast_llm, max_tokens=self.send_token_limit
),
).run_after(WatchdogComponent)
self.user_interaction = UserInteractionComponent(legacy_config)
self.file_manager = FileManagerComponent(settings, file_storage)
if not app_config.noninteractive_mode:
self.user_interaction = UserInteractionComponent()
self.file_manager = FileManagerComponent(file_storage, settings)
self.code_executor = CodeExecutorComponent(
self.file_manager.workspace,
settings,
legacy_config,
CodeExecutorConfiguration(
docker_container_name=f"{settings.agent_id}_sandbox"
),
)
self.git_ops = GitOperationsComponent(legacy_config)
self.image_gen = ImageGeneratorComponent(
self.file_manager.workspace, legacy_config
self.git_ops = GitOperationsComponent()
self.image_gen = ImageGeneratorComponent(self.file_manager.workspace)
self.web_search = WebSearchComponent()
self.web_selenium = WebSeleniumComponent(
llm_provider,
app_config.app_data_dir,
)
self.web_search = WebSearchComponent(legacy_config)
self.web_selenium = WebSeleniumComponent(legacy_config, llm_provider, self.llm)
self.context = ContextComponent(self.file_manager.workspace, settings.context)
self.watchdog = WatchdogComponent(settings.config, settings.history).run_after(
ContextComponent
)
self.event_history = settings.history
self.legacy_config = legacy_config
self.app_config = app_config
async def propose_action(self) -> OneShotAgentActionProposal:
"""Proposes the next action to execute, based on the task and current state.
@@ -166,7 +174,7 @@ class Agent(BaseAgent[OneShotAgentActionProposal], Configurable[AgentSettings]):
ai_profile=self.state.ai_profile,
ai_directives=directives,
commands=function_specs_from_commands(self.commands),
include_os_info=self.legacy_config.execute_local_commands,
include_os_info=self.code_executor.config.execute_local_commands,
)
logger.debug(f"Executing prompt:\n{dump_prompt(prompt)}")
@@ -277,7 +285,7 @@ class Agent(BaseAgent[OneShotAgentActionProposal], Configurable[AgentSettings]):
command
for command in self.commands
if not any(
name in self.legacy_config.disabled_commands for name in command.names
name in self.app_config.disabled_commands for name in command.names
)
]

View File

@@ -23,7 +23,6 @@ from forge.agent_protocol.models import (
TaskRequestBody,
TaskStepsListResponse,
)
from forge.config.config import Config
from forge.file_storage import FileStorage
from forge.llm.providers import ModelProviderBudget, MultiProvider
from forge.models.action import ActionErrorResult, ActionSuccessResult
@@ -35,6 +34,7 @@ from sentry_sdk import set_user
from autogpt.agent_factory.configurators import configure_agent_with_state, create_agent
from autogpt.agents.agent_manager import AgentManager
from autogpt.app.config import AppConfig
from autogpt.app.utils import is_port_free
logger = logging.getLogger(__name__)
@@ -45,7 +45,7 @@ class AgentProtocolServer:
def __init__(
self,
app_config: Config,
app_config: AppConfig,
database: AgentDB,
file_storage: FileStorage,
llm_provider: MultiProvider,

View File

@@ -28,24 +28,6 @@ def cli(ctx: click.Context):
help="Defines the number of times to run in continuous mode",
)
@click.option("--speak", is_flag=True, help="Enable Speak Mode")
@click.option(
"-b",
"--browser-name",
help="Specifies which web-browser to use when using selenium to scrape the web.",
)
@click.option(
"--allow-downloads",
is_flag=True,
help="Dangerous: Allows AutoGPT to download files natively.",
)
@click.option(
# TODO: this is a hidden option for now, necessary for integration testing.
# We should make this public once we're ready to roll out agent specific workspaces.
"--workspace-directory",
"-w",
type=click.Path(file_okay=False),
hidden=True,
)
@click.option(
"--install-plugin-deps",
is_flag=True,
@@ -128,13 +110,15 @@ def cli(ctx: click.Context):
),
type=click.Choice([i.value for i in LogFormatName]),
)
@click.option(
"--component-config-file",
help="Path to a json configuration file",
type=click.Path(exists=True, dir_okay=False, resolve_path=True),
)
def run(
continuous: bool,
continuous_limit: Optional[int],
speak: bool,
browser_name: Optional[str],
allow_downloads: bool,
workspace_directory: Optional[Path],
install_plugin_deps: bool,
skip_news: bool,
skip_reprompt: bool,
@@ -148,6 +132,7 @@ def run(
log_level: Optional[str],
log_format: Optional[str],
log_file_format: Optional[str],
component_config_file: Optional[Path],
) -> None:
"""
Sets up and runs an agent, based on the task specified by the user, or resumes an
@@ -165,10 +150,7 @@ def run(
log_level=log_level,
log_format=log_format,
log_file_format=log_file_format,
browser_name=browser_name,
allow_downloads=allow_downloads,
skip_news=skip_news,
workspace_directory=workspace_directory,
install_plugin_deps=install_plugin_deps,
override_ai_name=ai_name,
override_ai_role=ai_role,
@@ -176,20 +158,11 @@ def run(
constraints=list(constraint),
best_practices=list(best_practice),
override_directives=override_directives,
component_config_file=component_config_file,
)
@cli.command()
@click.option(
"-b",
"--browser-name",
help="Specifies which web-browser to use when using selenium to scrape the web.",
)
@click.option(
"--allow-downloads",
is_flag=True,
help="Dangerous: Allows AutoGPT to download files natively.",
)
@click.option(
"--install-plugin-deps",
is_flag=True,
@@ -217,8 +190,6 @@ def run(
type=click.Choice([i.value for i in LogFormatName]),
)
def serve(
browser_name: Optional[str],
allow_downloads: bool,
install_plugin_deps: bool,
debug: bool,
log_level: Optional[str],
@@ -237,8 +208,6 @@ def serve(
log_level=log_level,
log_format=log_format,
log_file_format=log_file_format,
browser_name=browser_name,
allow_downloads=allow_downloads,
install_plugin_deps=install_plugin_deps,
)

View File

@@ -7,15 +7,13 @@ import re
from pathlib import Path
from typing import Any, Optional, Union
from pydantic import SecretStr, validator
import forge
from forge.file_storage import FileStorageBackendName
from forge.config.base import BaseConfig
from forge.llm.providers import CHAT_MODELS, ModelName
from forge.llm.providers.openai import OpenAICredentials, OpenAIModelName
from forge.logging.config import LoggingConfig
from forge.models.config import Configurable, SystemSettings, UserConfigurable
from forge.speech.say import TTSConfig
from forge.models.config import Configurable, UserConfigurable
from pydantic import SecretStr, validator
logger = logging.getLogger(__name__)
@@ -26,7 +24,7 @@ GPT_4_MODEL = OpenAIModelName.GPT4
GPT_3_MODEL = OpenAIModelName.GPT3
class Config(SystemSettings, arbitrary_types_allowed=True):
class AppConfig(BaseConfig):
name: str = "Auto-GPT configuration"
description: str = "Default configuration for the Auto-GPT application."
@@ -40,14 +38,9 @@ class Config(SystemSettings, arbitrary_types_allowed=True):
authorise_key: str = UserConfigurable(default="y", from_env="AUTHORISE_COMMAND_KEY")
exit_key: str = UserConfigurable(default="n", from_env="EXIT_KEY")
noninteractive_mode: bool = False
# TTS configuration
logging: LoggingConfig = LoggingConfig()
tts_config: TTSConfig = TTSConfig()
# File storage
file_storage_backend: FileStorageBackendName = UserConfigurable(
default=FileStorageBackendName.LOCAL, from_env="FILE_STORAGE_BACKEND"
component_config_file: Optional[Path] = UserConfigurable(
default=None, from_env="COMPONENT_CONFIG_FILE"
)
##########################
@@ -69,9 +62,6 @@ class Config(SystemSettings, arbitrary_types_allowed=True):
embedding_model: str = UserConfigurable(
default="text-embedding-3-small", from_env="EMBEDDING_MODEL"
)
browse_spacy_language_model: str = UserConfigurable(
default="en_core_web_sm", from_env="BROWSE_SPACY_LANGUAGE_MODEL"
)
# Run loop configuration
continuous_mode: bool = False
@@ -91,56 +81,6 @@ class Config(SystemSettings, arbitrary_types_allowed=True):
default=True,
from_env=lambda: os.getenv("RESTRICT_TO_WORKSPACE", "True") == "True",
)
allow_downloads: bool = False
# Shell commands
shell_command_control: str = UserConfigurable(
default="denylist", from_env="SHELL_COMMAND_CONTROL"
)
execute_local_commands: bool = UserConfigurable(
default=False,
from_env=lambda: os.getenv("EXECUTE_LOCAL_COMMANDS", "False") == "True",
)
shell_denylist: list[str] = UserConfigurable(
default_factory=lambda: ["sudo", "su"],
from_env=lambda: _safe_split(
os.getenv("SHELL_DENYLIST", os.getenv("DENY_COMMANDS"))
),
)
shell_allowlist: list[str] = UserConfigurable(
default_factory=list,
from_env=lambda: _safe_split(
os.getenv("SHELL_ALLOWLIST", os.getenv("ALLOW_COMMANDS"))
),
)
# Text to image
image_provider: Optional[str] = UserConfigurable(from_env="IMAGE_PROVIDER")
huggingface_image_model: str = UserConfigurable(
default="CompVis/stable-diffusion-v1-4", from_env="HUGGINGFACE_IMAGE_MODEL"
)
sd_webui_url: Optional[str] = UserConfigurable(
default="http://localhost:7860", from_env="SD_WEBUI_URL"
)
image_size: int = UserConfigurable(default=256, from_env="IMAGE_SIZE")
# Audio to text
audio_to_text_provider: str = UserConfigurable(
default="huggingface", from_env="AUDIO_TO_TEXT_PROVIDER"
)
huggingface_audio_to_text_model: Optional[str] = UserConfigurable(
from_env="HUGGINGFACE_AUDIO_TO_TEXT_MODEL"
)
# Web browsing
selenium_web_browser: str = UserConfigurable("chrome", from_env="USE_WEB_BROWSER")
selenium_headless: bool = UserConfigurable(
default=True, from_env=lambda: os.getenv("HEADLESS_BROWSER", "True") == "True"
)
user_agent: str = UserConfigurable(
default="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", # noqa: E501
from_env="USER_AGENT",
)
###############
# Credentials #
@@ -151,24 +91,6 @@ class Config(SystemSettings, arbitrary_types_allowed=True):
default=AZURE_CONFIG_FILE, from_env="AZURE_CONFIG_FILE"
)
# Github
github_api_key: Optional[str] = UserConfigurable(from_env="GITHUB_API_KEY")
github_username: Optional[str] = UserConfigurable(from_env="GITHUB_USERNAME")
# Google
google_api_key: Optional[str] = UserConfigurable(from_env="GOOGLE_API_KEY")
google_custom_search_engine_id: Optional[str] = UserConfigurable(
from_env="GOOGLE_CUSTOM_SEARCH_ENGINE_ID",
)
# Huggingface
huggingface_api_token: Optional[str] = UserConfigurable(
from_env="HUGGINGFACE_API_TOKEN"
)
# Stable Diffusion
sd_webui_auth: Optional[str] = UserConfigurable(from_env="SD_WEBUI_AUTH")
@validator("openai_functions")
def validate_openai_functions(cls, v: bool, values: dict[str, Any]):
if v:
@@ -180,11 +102,11 @@ class Config(SystemSettings, arbitrary_types_allowed=True):
return v
class ConfigBuilder(Configurable[Config]):
default_settings = Config()
class ConfigBuilder(Configurable[AppConfig]):
default_settings = AppConfig()
@classmethod
def build_config_from_env(cls, project_root: Path = PROJECT_ROOT) -> Config:
def build_config_from_env(cls, project_root: Path = PROJECT_ROOT) -> AppConfig:
"""Initialize the Config class"""
config = cls.build_agent_configuration()
@@ -206,14 +128,13 @@ class ConfigBuilder(Configurable[Config]):
return config
async def assert_config_has_required_llm_api_keys(config: Config) -> None:
async def assert_config_has_required_llm_api_keys(config: AppConfig) -> None:
"""
Check if API keys (if required) are set for the configured SMART_LLM and FAST_LLM.
"""
from pydantic import ValidationError
from forge.llm.providers.anthropic import AnthropicModelName
from forge.llm.providers.groq import GroqModelName
from pydantic import ValidationError
if set((config.smart_llm, config.fast_llm)).intersection(AnthropicModelName):
from forge.llm.providers.anthropic import AnthropicCredentials
@@ -243,9 +164,8 @@ async def assert_config_has_required_llm_api_keys(config: Config) -> None:
)
if set((config.smart_llm, config.fast_llm)).intersection(GroqModelName):
from groq import AuthenticationError
from forge.llm.providers.groq import GroqProvider
from groq import AuthenticationError
try:
groq = GroqProvider()
@@ -268,9 +188,8 @@ async def assert_config_has_required_llm_api_keys(config: Config) -> None:
raise ValueError("Groq is unavailable: invalid API key") from e
if set((config.smart_llm, config.fast_llm)).intersection(OpenAIModelName):
from openai import AuthenticationError
from forge.llm.providers.openai import OpenAIProvider
from openai import AuthenticationError
try:
openai = OpenAIProvider()

View File

@@ -5,20 +5,18 @@ import logging
from typing import Literal, Optional
import click
from colorama import Back, Style
from forge.config.config import GPT_3_MODEL, Config
from forge.llm.providers import ModelName, MultiProvider
from autogpt.app.config import GPT_3_MODEL, AppConfig
logger = logging.getLogger(__name__)
async def apply_overrides_to_config(
config: Config,
config: AppConfig,
continuous: bool = False,
continuous_limit: Optional[int] = None,
skip_reprompt: bool = False,
browser_name: Optional[str] = None,
allow_downloads: bool = False,
skip_news: bool = False,
) -> None:
"""Updates the config object with the given arguments.
@@ -33,8 +31,6 @@ async def apply_overrides_to_config(
log_level (int): The global log level for the application.
log_format (str): The format for the log(s).
log_file_format (str): Override the format for the log file.
browser_name (str): The name of the browser to use for scraping the web.
allow_downloads (bool): Whether to allow AutoGPT to download files natively.
skips_news (bool): Whether to suppress the output of latest news on startup.
"""
config.continuous_mode = False
@@ -61,23 +57,6 @@ async def apply_overrides_to_config(
if skip_reprompt:
config.skip_reprompt = True
if browser_name:
config.selenium_web_browser = browser_name
if allow_downloads:
logger.warning(
msg=f"{Back.LIGHTYELLOW_EX}"
"AutoGPT will now be able to download and save files to your machine."
f"{Back.RESET}"
" It is recommended that you monitor any files it downloads carefully.",
)
logger.warning(
msg=f"{Back.RED + Style.BRIGHT}"
"NEVER OPEN FILES YOU AREN'T SURE OF!"
f"{Style.RESET_ALL}",
)
config.allow_downloads = True
if skip_news:
config.skip_news = True

View File

@@ -21,11 +21,6 @@ from forge.components.code_executor.code_executor import (
)
from forge.config.ai_directives import AIDirectives
from forge.config.ai_profile import AIProfile
from forge.config.config import (
Config,
ConfigBuilder,
assert_config_has_required_llm_api_keys,
)
from forge.file_storage import FileStorageBackendName, get_storage
from forge.llm.providers import MultiProvider
from forge.logging.config import configure_logging
@@ -38,6 +33,11 @@ from forge.utils.exceptions import AgentTerminated, InvalidAgentResponseError
from autogpt.agent_factory.configurators import configure_agent_with_state, create_agent
from autogpt.agents.agent_manager import AgentManager
from autogpt.agents.prompt_strategies.one_shot import AssistantThoughts
from autogpt.app.config import (
AppConfig,
ConfigBuilder,
assert_config_has_required_llm_api_keys,
)
if TYPE_CHECKING:
from autogpt.agents.agent import Agent
@@ -66,10 +66,7 @@ async def run_auto_gpt(
log_level: Optional[str] = None,
log_format: Optional[str] = None,
log_file_format: Optional[str] = None,
browser_name: Optional[str] = None,
allow_downloads: bool = False,
skip_news: bool = False,
workspace_directory: Optional[Path] = None,
install_plugin_deps: bool = False,
override_ai_name: Optional[str] = None,
override_ai_role: Optional[str] = None,
@@ -77,6 +74,7 @@ async def run_auto_gpt(
constraints: Optional[list[str]] = None,
best_practices: Optional[list[str]] = None,
override_directives: bool = False,
component_config_file: Optional[Path] = None,
):
# Set up configuration
config = ConfigBuilder.build_config_from_env()
@@ -109,8 +107,6 @@ async def run_auto_gpt(
continuous=continuous,
continuous_limit=continuous_limit,
skip_reprompt=skip_reprompt,
browser_name=browser_name,
allow_downloads=allow_downloads,
skip_news=skip_news,
)
@@ -135,15 +131,12 @@ async def run_auto_gpt(
print_python_version_info(logger)
print_attribute("Smart LLM", config.smart_llm)
print_attribute("Fast LLM", config.fast_llm)
print_attribute("Browser", config.selenium_web_browser)
if config.continuous_mode:
print_attribute("Continuous Mode", "ENABLED", title_color=Fore.YELLOW)
if continuous_limit:
print_attribute("Continuous Limit", config.continuous_limit)
if config.tts_config.speak_mode:
print_attribute("Speak Mode", "ENABLED")
if config.allow_downloads:
print_attribute("Native Downloading", "ENABLED")
if we_are_running_in_a_docker_container() or is_docker_available():
print_attribute("Code Execution", "ENABLED")
else:
@@ -330,6 +323,14 @@ async def run_auto_gpt(
# )
# ).add_done_callback(update_agent_directives)
# Load component configuration from file
if _config_file := component_config_file or config.component_config_file:
try:
logger.info(f"Loading component configuration from {_config_file}")
agent.load_component_configs(_config_file.read_text())
except Exception as e:
logger.error(f"Could not load component configuration: {e}")
#################
# Run the Agent #
#################
@@ -356,8 +357,6 @@ async def run_auto_gpt_server(
log_level: Optional[str] = None,
log_format: Optional[str] = None,
log_file_format: Optional[str] = None,
browser_name: Optional[str] = None,
allow_downloads: bool = False,
install_plugin_deps: bool = False,
):
from .agent_protocol_server import AgentProtocolServer
@@ -387,8 +386,6 @@ async def run_auto_gpt_server(
await apply_overrides_to_config(
config=config,
browser_name=browser_name,
allow_downloads=allow_downloads,
)
llm_provider = _configure_llm_provider(config)
@@ -413,7 +410,7 @@ async def run_auto_gpt_server(
)
def _configure_llm_provider(config: Config) -> MultiProvider:
def _configure_llm_provider(config: AppConfig) -> MultiProvider:
multi_provider = MultiProvider()
for model in [config.smart_llm, config.fast_llm]:
# Ensure model providers for configured LLMs are available
@@ -453,15 +450,15 @@ async def run_interaction_loop(
None
"""
# These contain both application config and agent config, so grab them here.
legacy_config = agent.legacy_config
app_config = agent.app_config
ai_profile = agent.state.ai_profile
logger = logging.getLogger(__name__)
cycle_budget = cycles_remaining = _get_cycle_budget(
legacy_config.continuous_mode, legacy_config.continuous_limit
app_config.continuous_mode, app_config.continuous_limit
)
spinner = Spinner(
"Thinking...", plain_output=legacy_config.logging.plain_console_output
"Thinking...", plain_output=app_config.logging.plain_console_output
)
stop_reason = None
@@ -539,7 +536,7 @@ async def run_interaction_loop(
update_user(
ai_profile,
action_proposal,
speak_mode=legacy_config.tts_config.speak_mode,
speak_mode=app_config.tts_config.speak_mode,
)
##################
@@ -548,7 +545,7 @@ async def run_interaction_loop(
handle_stop_signal()
if cycles_remaining == 1: # Last cycle
feedback_type, feedback, new_cycles_remaining = await get_user_feedback(
legacy_config,
app_config,
ai_profile,
)
@@ -659,7 +656,7 @@ def update_user(
async def get_user_feedback(
config: Config,
config: AppConfig,
ai_profile: AIProfile,
) -> tuple[UserFeedback, str, int | None]:
"""Gets the user's feedback on the assistant's reply.

View File

@@ -4,9 +4,10 @@ from typing import Optional
from forge.config.ai_directives import AIDirectives
from forge.config.ai_profile import AIProfile
from forge.config.config import Config
from forge.logging.utils import print_attribute
from autogpt.app.config import AppConfig
from .input import clean_input
logger = logging.getLogger(__name__)
@@ -46,7 +47,7 @@ def apply_overrides_to_ai_settings(
async def interactively_revise_ai_settings(
ai_profile: AIProfile,
directives: AIDirectives,
app_config: Config,
app_config: AppConfig,
):
"""Interactively revise the AI settings.

View File

@@ -6,7 +6,6 @@ from pathlib import Path
import pytest
from forge.config.ai_profile import AIProfile
from forge.config.config import Config, ConfigBuilder
from forge.file_storage.local import (
FileStorage,
FileStorageConfiguration,
@@ -16,6 +15,7 @@ from forge.llm.providers import MultiProvider
from forge.logging.config import configure_logging
from autogpt.agents.agent import Agent, AgentConfiguration, AgentSettings
from autogpt.app.config import AppConfig, ConfigBuilder
from autogpt.app.main import _configure_llm_provider
pytest_plugins = [
@@ -62,7 +62,7 @@ def config(
@pytest.fixture(scope="session")
def setup_logger(config: Config):
def setup_logger():
configure_logging(
debug=True,
log_dir=Path(__file__).parent / "logs",
@@ -71,12 +71,14 @@ def setup_logger(config: Config):
@pytest.fixture
def llm_provider(config: Config) -> MultiProvider:
def llm_provider(config: AppConfig) -> MultiProvider:
return _configure_llm_provider(config)
@pytest.fixture
def agent(config: Config, llm_provider: MultiProvider, storage: FileStorage) -> Agent:
def agent(
config: AppConfig, llm_provider: MultiProvider, storage: FileStorage
) -> Agent:
ai_profile = AIProfile(
ai_name="Base",
ai_role="A base AI",
@@ -101,6 +103,6 @@ def agent(config: Config, llm_provider: MultiProvider, storage: FileStorage) ->
settings=agent_settings,
llm_provider=llm_provider,
file_storage=storage,
legacy_config=config,
app_config=config,
)
return agent

View File

@@ -2,15 +2,15 @@ from pathlib import Path
import pytest
from forge.config.ai_profile import AIProfile
from forge.config.config import Config
from forge.file_storage import FileStorageBackendName, get_storage
from forge.llm.providers import MultiProvider
from autogpt.agents.agent import Agent, AgentConfiguration, AgentSettings
from autogpt.app.config import AppConfig
@pytest.fixture
def dummy_agent(config: Config, llm_provider: MultiProvider):
def dummy_agent(config: AppConfig, llm_provider: MultiProvider):
ai_profile = AIProfile(
ai_name="Dummy Agent",
ai_role="Dummy Role",
@@ -44,7 +44,7 @@ def dummy_agent(config: Config, llm_provider: MultiProvider):
settings=agent_settings,
llm_provider=llm_provider,
file_storage=file_storage,
legacy_config=config,
app_config=config,
)
return agent

View File

@@ -5,19 +5,17 @@ from pathlib import Path
import pytest
from forge.components.code_executor.code_executor import (
ALLOWLIST_CONTROL,
CodeExecutorComponent,
is_docker_available,
we_are_running_in_a_docker_container,
)
from forge.file_storage.base import FileStorage
from forge.utils.exceptions import InvalidArgumentError, OperationNotAllowedError
from autogpt.agents.agent import Agent
@pytest.fixture
def code_executor_component(agent: Agent):
return agent.code_executor
def code_executor_component(storage: FileStorage):
return CodeExecutorComponent(storage)
@pytest.fixture
@@ -26,10 +24,8 @@ def random_code(random_string) -> str:
@pytest.fixture
def python_test_file(agent: Agent, random_code: str):
temp_file = tempfile.NamedTemporaryFile(
dir=agent.file_manager.workspace.root, suffix=".py"
)
def python_test_file(storage: FileStorage, random_code: str):
temp_file = tempfile.NamedTemporaryFile(dir=storage.root, suffix=".py")
temp_file.write(str.encode(random_code))
temp_file.flush()
@@ -38,10 +34,8 @@ def python_test_file(agent: Agent, random_code: str):
@pytest.fixture
def python_test_args_file(agent: Agent):
temp_file = tempfile.NamedTemporaryFile(
dir=agent.file_manager.workspace.root, suffix=".py"
)
def python_test_args_file(storage: FileStorage):
temp_file = tempfile.NamedTemporaryFile(dir=storage.root, suffix=".py")
temp_file.write(str.encode("import sys\nprint(sys.argv[1], sys.argv[2])"))
temp_file.flush()
@@ -58,7 +52,6 @@ def test_execute_python_file(
code_executor_component: CodeExecutorComponent,
python_test_file: Path,
random_string: str,
agent: Agent,
):
if not (is_docker_available() or we_are_running_in_a_docker_container()):
pytest.skip("Docker is not available")
@@ -71,7 +64,6 @@ def test_execute_python_file_args(
code_executor_component: CodeExecutorComponent,
python_test_args_file: Path,
random_string: str,
agent: Agent,
):
if not (is_docker_available() or we_are_running_in_a_docker_container()):
pytest.skip("Docker is not available")
@@ -89,7 +81,6 @@ async def test_execute_python_code(
code_executor_component: CodeExecutorComponent,
random_code: str,
random_string: str,
agent: Agent,
):
if not (is_docker_available() or we_are_running_in_a_docker_container()):
pytest.skip("Docker is not available")
@@ -98,16 +89,12 @@ async def test_execute_python_code(
assert result.replace("\r", "") == f"Hello {random_string}!\n"
def test_execute_python_file_invalid(
code_executor_component: CodeExecutorComponent, agent: Agent
):
def test_execute_python_file_invalid(code_executor_component: CodeExecutorComponent):
with pytest.raises(InvalidArgumentError):
code_executor_component.execute_python_file(Path("not_python.txt"))
def test_execute_python_file_not_found(
code_executor_component: CodeExecutorComponent, agent: Agent
):
def test_execute_python_file_not_found(code_executor_component: CodeExecutorComponent):
with pytest.raises(
FileNotFoundError,
match=r"python: can't open file '([a-zA-Z]:)?[/\\\-\w]*notexist.py': "
@@ -116,53 +103,41 @@ def test_execute_python_file_not_found(
code_executor_component.execute_python_file(Path("notexist.py"))
def test_execute_shell(
code_executor_component: CodeExecutorComponent, random_string: str, agent: Agent
):
result = code_executor_component.execute_shell(f"echo 'Hello {random_string}!'")
assert f"Hello {random_string}!" in result
def test_execute_shell_local_commands_not_allowed(
code_executor_component: CodeExecutorComponent, random_string: str, agent: Agent
):
result = code_executor_component.execute_shell(f"echo 'Hello {random_string}!'")
assert f"Hello {random_string}!" in result
def test_execute_shell_denylist_should_deny(
code_executor_component: CodeExecutorComponent, agent: Agent, random_string: str
code_executor_component: CodeExecutorComponent, random_string: str
):
agent.legacy_config.shell_denylist = ["echo"]
code_executor_component.config.shell_command_control = "denylist"
code_executor_component.config.shell_denylist = ["echo"]
with pytest.raises(OperationNotAllowedError, match="not allowed"):
code_executor_component.execute_shell(f"echo 'Hello {random_string}!'")
def test_execute_shell_denylist_should_allow(
code_executor_component: CodeExecutorComponent, agent: Agent, random_string: str
code_executor_component: CodeExecutorComponent, random_string: str
):
agent.legacy_config.shell_denylist = ["cat"]
code_executor_component.config.shell_command_control = "denylist"
code_executor_component.config.shell_denylist = ["cat"]
result = code_executor_component.execute_shell(f"echo 'Hello {random_string}!'")
assert "Hello" in result and random_string in result
def test_execute_shell_allowlist_should_deny(
code_executor_component: CodeExecutorComponent, agent: Agent, random_string: str
code_executor_component: CodeExecutorComponent, random_string: str
):
agent.legacy_config.shell_command_control = ALLOWLIST_CONTROL
agent.legacy_config.shell_allowlist = ["cat"]
code_executor_component.config.shell_command_control = "allowlist"
code_executor_component.config.shell_allowlist = ["cat"]
with pytest.raises(OperationNotAllowedError, match="not allowed"):
code_executor_component.execute_shell(f"echo 'Hello {random_string}!'")
def test_execute_shell_allowlist_should_allow(
code_executor_component: CodeExecutorComponent, agent: Agent, random_string: str
code_executor_component: CodeExecutorComponent, random_string: str
):
agent.legacy_config.shell_command_control = ALLOWLIST_CONTROL
agent.legacy_config.shell_allowlist = ["echo"]
code_executor_component.config.shell_command_control = "allowlist"
code_executor_component.config.shell_allowlist = ["echo"]
result = code_executor_component.execute_shell(f"echo 'Hello {random_string}!'")
assert "Hello" in result and random_string in result

View File

@@ -5,14 +5,27 @@ from unittest.mock import patch
import pytest
from forge.components.image_gen import ImageGeneratorComponent
from forge.components.image_gen.image_gen import ImageGeneratorConfiguration
from forge.file_storage.base import FileStorage
from forge.llm.providers.openai import OpenAICredentials
from PIL import Image
from autogpt.agents.agent import Agent
from pydantic import SecretStr
@pytest.fixture
def image_gen_component(agent: Agent):
return agent.image_gen
def image_gen_component(storage: FileStorage):
cred = OpenAICredentials.from_env()
return ImageGeneratorComponent(storage, openai_credentials=cred)
@pytest.fixture
def huggingface_image_gen_component(storage: FileStorage):
config = ImageGeneratorConfiguration(
image_provider="huggingface",
huggingface_api_token=SecretStr("1"),
huggingface_image_model="CompVis/stable-diffusion-v1-4",
)
return ImageGeneratorComponent(storage, config=config)
@pytest.fixture(params=[256, 512, 1024])
@@ -25,16 +38,11 @@ def image_size(request):
@pytest.mark.vcr
def test_dalle(
image_gen_component: ImageGeneratorComponent,
agent: Agent,
storage,
image_size,
cached_openai_client,
):
"""Test DALL-E image generation."""
generate_and_validate(
image_gen_component,
agent,
storage,
image_provider="dalle",
image_size=image_size,
)
@@ -51,16 +59,12 @@ def test_dalle(
)
def test_huggingface(
image_gen_component: ImageGeneratorComponent,
agent: Agent,
storage,
image_size,
image_model,
):
"""Test HuggingFace image generation."""
generate_and_validate(
image_gen_component,
agent,
storage,
image_provider="huggingface",
image_size=image_size,
hugging_face_image_model=image_model,
@@ -68,14 +72,10 @@ def test_huggingface(
@pytest.mark.xfail(reason="SD WebUI call does not work.")
def test_sd_webui(
image_gen_component: ImageGeneratorComponent, agent: Agent, storage, image_size
):
def test_sd_webui(image_gen_component: ImageGeneratorComponent, image_size):
"""Test SD WebUI image generation."""
generate_and_validate(
image_gen_component,
agent,
storage,
image_provider="sd_webui",
image_size=image_size,
)
@@ -83,7 +83,7 @@ def test_sd_webui(
@pytest.mark.xfail(reason="SD WebUI call does not work.")
def test_sd_webui_negative_prompt(
image_gen_component: ImageGeneratorComponent, storage, image_size
image_gen_component: ImageGeneratorComponent, image_size
):
gen_image = functools.partial(
image_gen_component.generate_image_with_sd_webui,
@@ -114,17 +114,15 @@ def lst(txt):
def generate_and_validate(
image_gen_component: ImageGeneratorComponent,
agent: Agent,
storage,
image_size,
image_provider,
hugging_face_image_model=None,
**kwargs,
):
"""Generate an image and validate the output."""
agent.legacy_config.image_provider = image_provider
image_gen_component.config.image_provider = image_provider
if hugging_face_image_model:
agent.legacy_config.huggingface_image_model = hugging_face_image_model
image_gen_component.config.huggingface_image_model = hugging_face_image_model
prompt = "astronaut riding a horse"
image_path = lst(image_gen_component.generate_image(prompt, image_size, **kwargs))
@@ -149,9 +147,7 @@ def generate_and_validate(
)
@pytest.mark.parametrize("delay", [10, 0])
def test_huggingface_fail_request_with_delay(
image_gen_component: ImageGeneratorComponent,
agent: Agent,
storage,
huggingface_image_gen_component: ImageGeneratorComponent,
image_size,
image_model,
return_text,
@@ -173,14 +169,12 @@ def test_huggingface_fail_request_with_delay(
mock_post.return_value.ok = False
mock_post.return_value.text = return_text
agent.legacy_config.image_provider = "huggingface"
agent.legacy_config.huggingface_api_token = "mock-api-key"
agent.legacy_config.huggingface_image_model = image_model
huggingface_image_gen_component.config.huggingface_image_model = image_model
prompt = "astronaut riding a horse"
with patch("time.sleep") as mock_sleep:
# Verify request fails.
result = image_gen_component.generate_image(prompt, image_size)
result = huggingface_image_gen_component.generate_image(prompt, image_size)
assert result == "Error creating image."
# Verify retry was called with delay if delay is in return_text
@@ -191,10 +185,8 @@ def test_huggingface_fail_request_with_delay(
def test_huggingface_fail_request_no_delay(
mocker, image_gen_component: ImageGeneratorComponent, agent: Agent
mocker, huggingface_image_gen_component: ImageGeneratorComponent
):
agent.legacy_config.huggingface_api_token = "1"
# Mock requests.post
mock_post = mocker.patch("requests.post")
mock_post.return_value.status_code = 500
@@ -206,10 +198,9 @@ def test_huggingface_fail_request_no_delay(
# Mock time.sleep
mock_sleep = mocker.patch("time.sleep")
agent.legacy_config.image_provider = "huggingface"
agent.legacy_config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
result = image_gen_component.generate_image("astronaut riding a horse", 512)
result = huggingface_image_gen_component.generate_image(
"astronaut riding a horse", 512
)
assert result == "Error creating image."
@@ -218,10 +209,8 @@ def test_huggingface_fail_request_no_delay(
def test_huggingface_fail_request_bad_json(
mocker, image_gen_component: ImageGeneratorComponent, agent: Agent
mocker, huggingface_image_gen_component: ImageGeneratorComponent
):
agent.legacy_config.huggingface_api_token = "1"
# Mock requests.post
mock_post = mocker.patch("requests.post")
mock_post.return_value.status_code = 500
@@ -231,10 +220,9 @@ def test_huggingface_fail_request_bad_json(
# Mock time.sleep
mock_sleep = mocker.patch("time.sleep")
agent.legacy_config.image_provider = "huggingface"
agent.legacy_config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
result = image_gen_component.generate_image("astronaut riding a horse", 512)
result = huggingface_image_gen_component.generate_image(
"astronaut riding a horse", 512
)
assert result == "Error creating image."
@@ -243,17 +231,14 @@ def test_huggingface_fail_request_bad_json(
def test_huggingface_fail_request_bad_image(
mocker, image_gen_component: ImageGeneratorComponent, agent: Agent
mocker, huggingface_image_gen_component: ImageGeneratorComponent
):
agent.legacy_config.huggingface_api_token = "1"
# Mock requests.post
mock_post = mocker.patch("requests.post")
mock_post.return_value.status_code = 200
agent.legacy_config.image_provider = "huggingface"
agent.legacy_config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
result = image_gen_component.generate_image("astronaut riding a horse", 512)
result = huggingface_image_gen_component.generate_image(
"astronaut riding a horse", 512
)
assert result == "Error creating image."

View File

@@ -3,8 +3,8 @@ from unittest.mock import patch
import pytest
from forge.config.ai_directives import AIDirectives
from forge.config.ai_profile import AIProfile
from forge.config.config import Config
from autogpt.app.config import AppConfig
from autogpt.app.setup import (
apply_overrides_to_ai_settings,
interactively_revise_ai_settings,
@@ -39,7 +39,7 @@ async def test_apply_overrides_to_ai_settings():
@pytest.mark.asyncio
async def test_interactively_revise_ai_settings(config: Config):
async def test_interactively_revise_ai_settings(config: AppConfig):
ai_profile = AIProfile(ai_name="Test AI", ai_role="Test Role")
directives = AIDirectives(
resources=["Resource1"],

View File

@@ -8,15 +8,15 @@ from typing import Any
from unittest import mock
import pytest
from forge.config.config import GPT_3_MODEL, GPT_4_MODEL, Config, ConfigBuilder
from openai.pagination import AsyncPage
from openai.types import Model
from pydantic import SecretStr
from autogpt.app.config import GPT_3_MODEL, GPT_4_MODEL, AppConfig, ConfigBuilder
from autogpt.app.configurator import apply_overrides_to_config
def test_initial_values(config: Config) -> None:
def test_initial_values(config: AppConfig) -> None:
"""
Test if the initial values of the config class attributes are set correctly.
"""
@@ -29,7 +29,7 @@ def test_initial_values(config: Config) -> None:
@pytest.mark.asyncio
@mock.patch("openai.resources.models.AsyncModels.list")
async def test_fallback_to_gpt3_if_gpt4_not_available(
mock_list_models: Any, config: Config
mock_list_models: Any, config: AppConfig
) -> None:
"""
Test if models update to gpt-3.5-turbo if gpt-4 is not available.
@@ -51,7 +51,7 @@ async def test_fallback_to_gpt3_if_gpt4_not_available(
assert config.smart_llm == GPT_3_MODEL
def test_missing_azure_config(config: Config) -> None:
def test_missing_azure_config(config: AppConfig) -> None:
assert config.openai_credentials is not None
config_file = config.app_data_dir / "azure_config.yaml"
@@ -68,7 +68,7 @@ def test_missing_azure_config(config: Config) -> None:
@pytest.fixture
def config_with_azure(config: Config):
def config_with_azure(config: AppConfig):
config_file = config.app_data_dir / "azure_config.yaml"
config_file.write_text(
f"""
@@ -91,7 +91,7 @@ azure_model_map:
del os.environ["AZURE_CONFIG_FILE"]
def test_azure_config(config_with_azure: Config) -> None:
def test_azure_config(config_with_azure: AppConfig) -> None:
assert (credentials := config_with_azure.openai_credentials) is not None
assert credentials.api_type == SecretStr("azure")
assert credentials.api_version == SecretStr("2023-06-01-preview")

View File

@@ -37,7 +37,7 @@ def test_clone_auto_gpt_repository(
assert clone_result == expected_output
mock_clone_from.assert_called_once_with(
url=f"{scheme}{agent.legacy_config.github_username}:{agent.legacy_config.github_api_key}@{repo}", # noqa: E501
url=f"{scheme}{git_ops_component.config.github_username}:{git_ops_component.config.github_api_key}@{repo}", # noqa: E501
to_path=clone_path,
)

View File

@@ -4,12 +4,15 @@ import pytest
from forge.components.web.search import WebSearchComponent
from forge.utils.exceptions import ConfigurationError
from googleapiclient.errors import HttpError
from pydantic import SecretStr
from autogpt.agents.agent import Agent
@pytest.fixture
def web_search_component(agent: Agent):
agent.web_search.config.google_api_key = SecretStr("test")
agent.web_search.config.google_custom_search_engine_id = SecretStr("test")
return agent.web_search

View File

@@ -1,63 +0,0 @@
# 🖼 Image Generation configuration
| Config variable | Values | |
| ---------------- | ------------------------------- | -------------------- |
| `IMAGE_PROVIDER` | `dalle` `huggingface` `sdwebui` | **default: `dalle`** |
## DALL-e
In `.env`, make sure `IMAGE_PROVIDER` is commented (or set to `dalle`):
```ini
# IMAGE_PROVIDER=dalle # this is the default
```
Further optional configuration:
| Config variable | Values | |
| ---------------- | ------------------ | -------------- |
| `IMAGE_SIZE` | `256` `512` `1024` | default: `256` |
## Hugging Face
To use text-to-image models from Hugging Face, you need a Hugging Face API token.
Link to the appropriate settings page: [Hugging Face > Settings > Tokens](https://huggingface.co/settings/tokens)
Once you have an API token, uncomment and adjust these variables in your `.env`:
```ini
IMAGE_PROVIDER=huggingface
HUGGINGFACE_API_TOKEN=your-huggingface-api-token
```
Further optional configuration:
| Config variable | Values | |
| ------------------------- | ---------------------- | ---------------------------------------- |
| `HUGGINGFACE_IMAGE_MODEL` | see [available models] | default: `CompVis/stable-diffusion-v1-4` |
[available models]: https://huggingface.co/models?pipeline_tag=text-to-image
## Stable Diffusion WebUI
It is possible to use your own self-hosted Stable Diffusion WebUI with AutoGPT:
```ini
IMAGE_PROVIDER=sdwebui
```
!!! note
Make sure you are running WebUI with `--api` enabled.
Further optional configuration:
| Config variable | Values | |
| --------------- | ----------------------- | -------------------------------- |
| `SD_WEBUI_URL` | URL to your WebUI | default: `http://127.0.0.1:7860` |
| `SD_WEBUI_AUTH` | `{username}:{password}` | *Note: do not copy the braces!* |
## Selenium
```shell
sudo Xvfb :10 -ac -screen 0 1024x768x24 & DISPLAY=:10 <YOUR_CLIENT>
```

View File

@@ -4,18 +4,14 @@ Configuration is controlled through the `Config` object. You can set configurati
## Environment Variables
- `AUDIO_TO_TEXT_PROVIDER`: Audio To Text Provider. Only option currently is `huggingface`. Default: huggingface
- `AUTHORISE_COMMAND_KEY`: Key response accepted when authorising commands. Default: y
- `ANTHROPIC_API_KEY`: Set this if you want to use Anthropic models with AutoGPT
- `AZURE_CONFIG_FILE`: Location of the Azure Config file relative to the AutoGPT root directory. Default: azure.yaml
- `BROWSE_CHUNK_MAX_LENGTH`: When browsing website, define the length of chunks to summarize. Default: 3000
- `BROWSE_SPACY_LANGUAGE_MODEL`: [spaCy language model](https://spacy.io/usage/models) to use when creating chunks. Default: en_core_web_sm
- `CHAT_MESSAGES_ENABLED`: Enable chat messages. Optional
- `DISABLED_COMMANDS`: Commands to disable. Use comma separated names of commands. See the list of commands from built-in components [here](../../forge/components/built-in-components.md). Default: None
- `COMPONENT_CONFIG_FILE`: Path to the component configuration file (json) for an agent. Optional
- `DISABLED_COMMANDS`: Commands to disable. Use comma separated names of commands. See the list of commands from built-in components [here](../components/components.md). Default: None
- `ELEVENLABS_API_KEY`: ElevenLabs API Key. Optional.
- `ELEVENLABS_VOICE_ID`: ElevenLabs Voice ID. Optional.
- `EMBEDDING_MODEL`: LLM Model to use for embedding tasks. Default: `text-embedding-3-small`
- `EXECUTE_LOCAL_COMMANDS`: If shell commands should be executed locally. Default: False
- `EXIT_KEY`: Exit key accepted to exit. Default: n
- `FAST_LLM`: LLM Model to use for most tasks. Default: `gpt-3.5-turbo-0125`
- `GITHUB_API_KEY`: [Github API Key](https://github.com/settings/tokens). Optional.
@@ -23,26 +19,16 @@ Configuration is controlled through the `Config` object. You can set configurati
- `GOOGLE_API_KEY`: Google API key. Optional.
- `GOOGLE_CUSTOM_SEARCH_ENGINE_ID`: [Google custom search engine ID](https://programmablesearchengine.google.com/controlpanel/all). Optional.
- `GROQ_API_KEY`: Set this if you want to use Groq models with AutoGPT
- `HEADLESS_BROWSER`: Use a headless browser while AutoGPT uses a web browser. Setting to `False` will allow you to see AutoGPT operate the browser. Default: True
- `HUGGINGFACE_API_TOKEN`: HuggingFace API, to be used for both image generation and audio to text. Optional.
- `HUGGINGFACE_AUDIO_TO_TEXT_MODEL`: HuggingFace audio to text model. Default: CompVis/stable-diffusion-v1-4
- `HUGGINGFACE_IMAGE_MODEL`: HuggingFace model to use for image generation. Default: CompVis/stable-diffusion-v1-4
- `IMAGE_PROVIDER`: Image provider. Options are `dalle`, `huggingface`, and `sdwebui`. Default: dalle
- `IMAGE_SIZE`: Default size of image to generate. Default: 256
- `OPENAI_API_KEY`: *REQUIRED*- Your [OpenAI API Key](https://platform.openai.com/account/api-keys).
- `OPENAI_ORGANIZATION`: Organization ID in OpenAI. Optional.
- `PLAIN_OUTPUT`: Plain output, which disables the spinner. Default: False
- `RESTRICT_TO_WORKSPACE`: The restrict file reading and writing to the workspace directory. Default: True
- `SD_WEBUI_AUTH`: Stable Diffusion Web UI username:password pair. Optional.
- `SD_WEBUI_URL`: Stable Diffusion Web UI URL. Default: http://localhost:7860
- `SHELL_ALLOWLIST`: List of shell commands that ARE allowed to be executed by AutoGPT. Only applies if `SHELL_COMMAND_CONTROL` is set to `allowlist`. Default: None
- `SHELL_COMMAND_CONTROL`: Whether to use `allowlist` or `denylist` to determine what shell commands can be executed (Default: denylist)
- `SHELL_DENYLIST`: List of shell commands that ARE NOT allowed to be executed by AutoGPT. Only applies if `SHELL_COMMAND_CONTROL` is set to `denylist`. Default: sudo,su
- `SMART_LLM`: LLM Model to use for "smart" tasks. Default: `gpt-4-turbo-preview`
- `STREAMELEMENTS_VOICE`: StreamElements voice to use. Default: Brian
- `TEMPERATURE`: Value of temperature given to OpenAI. Value from 0 to 2. Lower is more deterministic, higher is more random. See https://platform.openai.com/docs/api-reference/completions/create#completions/create-temperature
- `TEXT_TO_SPEECH_PROVIDER`: Text to Speech Provider. Options are `gtts`, `macos`, `elevenlabs`, and `streamelements`. Default: gtts
- `USER_AGENT`: User-Agent given when browsing websites. Default: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
- `USE_AZURE`: Use Azure's LLM Default: False
- `USE_WEB_BROWSER`: Which web browser to use. Options are `chrome`, `firefox`, `safari` or `edge` Default: chrome
- `WIPE_REDIS_ON_START`: Wipes data / index on start. Default: True

View File

@@ -2,36 +2,36 @@
!!! note
This section is optional. Use the official Google API if search attempts return
error 429. To use the `google_official_search` command, you need to set up your
Google API key in your environment variables.
error 429. To use the `google` command, you need to set up your
Google API key in your environment variables or pass it with configuration to the [`WebSearchComponent`](../../forge/components/built-in-components.md).
Create your project:
1. Go to the [Google Cloud Console](https://console.cloud.google.com/).
2. If you don't already have an account, create one and log in
3. Create a new project by clicking on the *Select a Project* dropdown at the top of the
1. If you don't already have an account, create one and log in
1. Create a new project by clicking on the *Select a Project* dropdown at the top of the
page and clicking *New Project*
4. Give it a name and click *Create*
5. Set up a custom search API and add to your .env file:
5. Go to the [APIs & Services Dashboard](https://console.cloud.google.com/apis/dashboard)
6. Click *Enable APIs and Services*
7. Search for *Custom Search API* and click on it
8. Click *Enable*
9. Go to the [Credentials](https://console.cloud.google.com/apis/credentials) page
10. Click *Create Credentials*
11. Choose *API Key*
12. Copy the API key
13. Set it as the `GOOGLE_API_KEY` in your `.env` file
14. [Enable](https://console.developers.google.com/apis/api/customsearch.googleapis.com)
1. Give it a name and click *Create*
1. Set up a custom search API and add to your .env file:
1. Go to the [APIs & Services Dashboard](https://console.cloud.google.com/apis/dashboard)
1. Click *Enable APIs and Services*
1. Search for *Custom Search API* and click on it
1. Click *Enable*
1. Go to the [Credentials](https://console.cloud.google.com/apis/credentials) page
1. Click *Create Credentials*
1. Choose *API Key*
1. Copy the API key
1. Set it as the `GOOGLE_API_KEY` in your `.env` file
1. [Enable](https://console.developers.google.com/apis/api/customsearch.googleapis.com)
the Custom Search API on your project. (Might need to wait few minutes to propagate.)
Set up a custom search engine and add to your .env file:
15. Go to the [Custom Search Engine](https://cse.google.com/cse/all) page
16. Click *Add*
17. Set up your search engine by following the prompts.
1. Go to the [Custom Search Engine](https://cse.google.com/cse/all) page
1. Click *Add*
1. Set up your search engine by following the prompts.
You can choose to search the entire web or specific sites
18. Once you've created your search engine, click on *Control Panel*
19. Click *Basics*
20. Copy the *Search engine ID*
21. Set it as the `CUSTOM_SEARCH_ENGINE_ID` in your `.env` file
1. Once you've created your search engine, click on *Control Panel*
1. Click *Basics*
1. Copy the *Search engine ID*
1. Set it as the `CUSTOM_SEARCH_ENGINE_ID` in your `.env` file
_Remember that your free daily custom search quota allows only up to 100 searches. To increase this limit, you need to assign a billing account to the project to profit from up to 10K daily searches._

View File

@@ -60,10 +60,6 @@ Options:
--debug Enable Debug Mode
--gpt3only Enable GPT3.5 Only Mode
--gpt4only Enable GPT4 Only Mode
-b, --browser-name TEXT Specifies which web-browser to use when
using selenium to scrape the web.
--allow-downloads Dangerous: Allows AutoGPT to download files
natively.
--skip-news Specifies whether to suppress the output of
latest news on startup.
--install-plugin-deps Installs external dependencies for 3rd party
@@ -82,6 +78,7 @@ Options:
--override-directives If specified, --constraint, --resource and
--best-practice will override the AI's
directives instead of being appended to them
--component-config-file TEXT Path to the json configuration file.
--help Show this message and exit.
```
</details>
@@ -128,10 +125,6 @@ Options:
--debug Enable Debug Mode
--gpt3only Enable GPT3.5 Only Mode
--gpt4only Enable GPT4 Only Mode
-b, --browser-name TEXT Specifies which web-browser to use when using
selenium to scrape the web.
--allow-downloads Dangerous: Allows AutoGPT to download files
natively.
--install-plugin-deps Installs external dependencies for 3rd party
plugins.
--help Show this message and exit.

View File

@@ -1,19 +1,26 @@
# Built-in Components
This page lists all [🧩 Components](./components.md) and [⚙️ Protocols](./protocols.md) they implement that are natively provided. They are used by the AutoGPT agent.
Some components have additional configuration options listed in the table, see [Component configuration](./components.md/#ordering-components) to learn more.
!!! note
If a configuration field uses environment variable, it still can be passed using configuration model. **Value from the configuration takes precedence over env var!** Env var will be only applied if value in the configuration is not set.
## `SystemComponent`
Essential component to allow an agent to finish.
**DirectiveProvider**
- Constraints about API budget
**MessageProvider**
- Current time and date
- Remaining API budget and warnings if budget is low
**CommandProvider**
- `finish` used when task is completed
## `UserInteractionComponent`
@@ -21,6 +28,7 @@ Essential component to allow an agent to finish.
Adds ability to interact with user in CLI.
**CommandProvider**
- `ask_user` used to ask user for input
## `FileManagerComponent`
@@ -28,10 +36,19 @@ Adds ability to interact with user in CLI.
Adds ability to read and write persistent files to local storage, Google Cloud Storage or Amazon's S3.
Necessary for saving and loading agent's state (preserving session).
| Config variable | Details | Type | Default |
| ---------------- | -------------------------------------- | ----- | ---------------------------------- |
| `files_path` | Path to agent files, e.g. state | `str` | `agents/{agent_id}/`[^1] |
| `workspace_path` | Path to files that agent has access to | `str` | `agents/{agent_id}/workspace/`[^1] |
[^1] This option is set dynamically during component construction as opposed to by default inside the configuration model, `{agent_id}` is replaced with the agent's unique identifier.
**DirectiveProvider**
- Resource information that it's possible to read and write files
**CommandProvider**
- `read_file` used to read file
- `write_file` used to write file
- `list_folder` lists all files in a folder
@@ -40,7 +57,16 @@ Necessary for saving and loading agent's state (preserving session).
Lets the agent execute non-interactive Shell commands and Python code. Python execution works only if Docker is available.
| Config variable | Details | Type | Default |
| ------------------------ | ---------------------------------------------------- | --------------------------- | ----------------- |
| `execute_local_commands` | Enable shell command execution | `bool` | `False` |
| `shell_command_control` | Controls which list is used | `"allowlist" \| "denylist"` | `"allowlist"` |
| `shell_allowlist` | List of allowed shell commands | `List[str]` | `[]` |
| `shell_denylist` | List of prohibited shell commands | `List[str]` | `[]` |
| `docker_container_name` | Name of the Docker container used for code execution | `str` | `"agent_sandbox"` |
**CommandProvider**
- `execute_shell` execute shell command
- `execute_shell_popen` execute shell command with popen
- `execute_python_code` execute Python code
@@ -50,38 +76,84 @@ Lets the agent execute non-interactive Shell commands and Python code. Python ex
Keeps track of agent's actions and their outcomes. Provides their summary to the prompt.
| Config variable | Details | Type | Default |
| ---------------------- | ------------------------------------------------------- | ----------- | ------------------ |
| `model_name` | Name of the llm model used to compress the history | `ModelName` | `"gpt-3.5-turbo"` |
| `max_tokens` | Maximum number of tokens to use for the history summary | `int` | `1024` |
| `spacy_language_model` | Language model used for summary chunking using spacy | `str` | `"en_core_web_sm"` |
**MessageProvider**
- Agent's progress summary
**AfterParse**
- Register agent's action
**ExecutionFailuer**
**ExecutionFailure**
- Rewinds the agent's action, so it isn't saved
**AfterExecute**
- Saves the agent's action result in the history
## `GitOperationsComponent`
Adds ability to iteract with git repositories and GitHub.
| Config variable | Details | Type | Default |
| ----------------- | ----------------------------------------- | ----- | ------- |
| `github_username` | GitHub username, *ENV:* `GITHUB_USERNAME` | `str` | `None` |
| `github_api_key` | GitHub API key, *ENV:* `GITHUB_API_KEY` | `str` | `None` |
**CommandProvider**
- `clone_repository` used to clone a git repository
## `ImageGeneratorComponent`
Adds ability to generate images using various providers, see [Image Generation configuration](./../configuration/imagegen.md) to learn more.
Adds ability to generate images using various providers.
### Hugging Face
To use text-to-image models from Hugging Face, you need a Hugging Face API token.
Link to the appropriate settings page: [Hugging Face > Settings > Tokens](https://huggingface.co/settings/tokens)
### Stable Diffusion WebUI
It is possible to use your own self-hosted Stable Diffusion WebUI with AutoGPT. **Make sure you are running WebUI with `--api` enabled.**
| Config variable | Details | Type | Default |
| ------------------------- | ------------------------------------------------------------- | --------------------------------------- | --------------------------------- |
| `image_provider` | Image generation provider | `"dalle" \| "huggingface" \| "sdwebui"` | `"dalle"` |
| `huggingface_image_model` | Hugging Face image model, see [available models] | `str` | `"CompVis/stable-diffusion-v1-4"` |
| `huggingface_api_token` | Hugging Face API token, *ENV:* `HUGGINGFACE_API_TOKEN` | `str` | `None` |
| `sd_webui_url` | URL to self-hosted Stable Diffusion WebUI | `str` | `"http://localhost:7860"` |
| `sd_webui_auth` | Basic auth for Stable Diffusion WebUI, *ENV:* `SD_WEBUI_AUTH` | `str` of format `{username}:{password}` | `None` |
[available models]: https://huggingface.co/models?pipeline_tag=text-to-image
**CommandProvider**
- `generate_image` used to generate an image given a prompt
## `WebSearchComponent`
Allows agent to search the web.
Allows agent to search the web. Google credentials aren't required for DuckDuckGo. [Instructions how to set up Google API key](../../AutoGPT/configuration/search.md)
| Config variable | Details | Type | Default |
| -------------------------------- | ----------------------------------------------------------------------- | ----- | ------- |
| `google_api_key` | Google API key, *ENV:* `GOOGLE_API_KEY` | `str` | `None` |
| `google_custom_search_engine_id` | Google Custom Search Engine ID, *ENV:* `GOOGLE_CUSTOM_SEARCH_ENGINE_ID` | `str` | `None` |
| `duckduckgo_max_attempts` | Maximum number of attempts to search using DuckDuckGo | `int` | `3` |
**DirectiveProvider**
- Resource information that it's possible to search the web
**CommandProvider**
- `search_web` used to search the web using DuckDuckGo
- `google` used to search the web using Google, requires API key
@@ -89,10 +161,20 @@ Allows agent to search the web.
Allows agent to read websites using Selenium.
| Config variable | Details | Type | Default |
| ----------------------------- | ------------------------------------------- | --------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- |
| `model_name` | Name of the llm model used to read websites | `ModelName` | `"gpt-3.5-turbo"` |
| `web_browser` | Web browser used by Selenium | `"chrome" \| "firefox" \| "safari" \| "edge"` | `"chrome"` |
| `headless` | Run browser in headless mode | `bool` | `True` |
| `user_agent` | User agent used by the browser | `str` | `"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"` |
| `browse_spacy_language_model` | Spacy language model used for chunking text | `str` | `"en_core_web_sm"` |
**DirectiveProvider**
- Resource information that it's possible to read websites
**CommandProvider**
- `read_website` used to read a specific url and look for specific topics or answer a question
## `ContextComponent`
@@ -100,9 +182,11 @@ Allows agent to read websites using Selenium.
Adds ability to keep up-to-date file and folder content in the prompt.
**MessageProvider**
- Content of elements in the context
**CommandProvider**
- `open_file` used to open a file into context
- `open_folder` used to open a folder into context
- `close_context_item` remove an item from the context
@@ -112,4 +196,5 @@ Adds ability to keep up-to-date file and folder content in the prompt.
Watches if agent is looping and switches to smart mode if necessary.
**AfterParse**
- Investigates what happened and switches to smart mode if necessary

View File

@@ -148,12 +148,12 @@ It gives an ability for the agent to ask user for input in the terminal.
yield self.ask_user
```
5. Since agent isn't always running in the terminal or interactive mode, we need to disable this component by setting `self._enabled` when it's not possible to ask for user input.
5. Since agent isn't always running in the terminal or interactive mode, we need to disable this component by setting `self._enabled=False` when it's not possible to ask for user input.
```py
def __init__(self, config: Config):
def __init__(self, interactive_mode: bool):
self.config = config
self._enabled = not config.noninteractive_mode
self._enabled = interactive_mode
```
The final component should look like this:
@@ -164,10 +164,10 @@ class MyUserInteractionComponent(CommandProvider):
"""Provides commands to interact with the user."""
# We pass config to check if we're in noninteractive mode
def __init__(self, config: Config):
def __init__(self, interactive_mode: bool):
self.config = config
# 5.
self._enabled = not config.noninteractive_mode
self._enabled = interactive_mode
# 4.
def get_commands(self) -> Iterator[Command]:
@@ -205,10 +205,10 @@ class MyAgent(Agent):
settings: AgentSettings,
llm_provider: MultiProvider,
file_storage: FileStorage,
legacy_config: Config,
app_config: Config,
):
# Call the parent constructor to bring in the default components
super().__init__(settings, llm_provider, file_storage, legacy_config)
super().__init__(settings, llm_provider, file_storage, app_config)
# Disable the default user interaction component by overriding it
self.user_interaction = MyUserInteractionComponent()
```
@@ -222,14 +222,14 @@ class MyAgent(Agent):
settings: AgentSettings,
llm_provider: MultiProvider,
file_storage: FileStorage,
legacy_config: Config,
app_config: Config,
):
# Call the parent constructor to bring in the default components
super().__init__(settings, llm_provider, file_storage, legacy_config)
super().__init__(settings, llm_provider, file_storage, app_config)
# Disable the default user interaction component
self.user_interaction = None
# Add our own component
self.my_user_interaction = MyUserInteractionComponent(legacy_config)
self.my_user_interaction = MyUserInteractionComponent(app_config)
```
## Learn more

View File

@@ -1,5 +1,11 @@
# Component Agents
!!! important
[Legacy plugins] no longer work with AutoGPT. They have been replaced by components,
although we're still working on a new system to load plug-in components.
[Legacy plugins]: https://github.com/Significant-Gravitas/Auto-GPT-Plugins
This guide explains the component-based architecture of AutoGPT agents. It's a new way of building agents that is more flexible and easier to extend. Components replace some agent's logic and plugins with a more modular and composable system.
Agent is composed of *components*, and each *component* implements a range of *protocols* (interfaces), each one providing a specific functionality, e.g. additional commands or messages. Each *protocol* is handled in a specific order, defined by the agent. This allows for a clear separation of concerns and a more modular design.

View File

@@ -15,7 +15,6 @@ nav:
- Options: AutoGPT/configuration/options.md
- Search: AutoGPT/configuration/search.md
- Voice: AutoGPT/configuration/voice.md
- Image Generation: AutoGPT/configuration/imagegen.md
- Usage: AutoGPT/usage.md
- Help us improve AutoGPT:
- Share your debug logs with us: AutoGPT/share-your-logs.md

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import copy
import inspect
import json
import logging
from abc import ABCMeta, abstractmethod
from typing import (
@@ -18,12 +19,13 @@ from typing import (
)
from colorama import Fore
from pydantic import BaseModel, Field, validator
from pydantic import BaseModel, Field, parse_raw_as, validator
from forge.agent import protocols
from forge.agent.components import (
AgentComponent,
ComponentEndpointError,
ConfigurableComponent,
EndpointPipelineError,
)
from forge.config.ai_directives import AIDirectives
@@ -45,6 +47,11 @@ DEFAULT_TRIGGERING_PROMPT = (
)
# HACK: This is a workaround wrapper to de/serialize component configs until pydantic v2
class ModelContainer(BaseModel):
models: dict[str, BaseModel]
class BaseAgentConfiguration(SystemConfiguration):
allow_fs_access: bool = UserConfigurable(default=False)
@@ -82,9 +89,6 @@ class BaseAgentConfiguration(SystemConfiguration):
defaults to 75% of `llm.max_tokens`.
"""
summary_max_tlength: Optional[int] = None
# TODO: move to ActionHistoryConfiguration
@validator("use_functions_api")
def validate_openai_functions(cls, v: bool, values: dict[str, Any]):
if v:
@@ -272,6 +276,30 @@ class BaseAgent(Generic[AnyProposal], metaclass=AgentMeta):
raise e
return method_result
def dump_component_configs(self) -> str:
configs = {}
for component in self.components:
if isinstance(component, ConfigurableComponent):
config_type_name = component.config.__class__.__name__
configs[config_type_name] = component.config
data = ModelContainer(models=configs).json()
raw = parse_raw_as(dict[str, dict[str, Any]], data)
return json.dumps(raw["models"], indent=4)
def load_component_configs(self, serialized_configs: str):
configs_dict = parse_raw_as(dict[str, dict[str, Any]], serialized_configs)
for component in self.components:
if not isinstance(component, ConfigurableComponent):
continue
config_type = type(component.config)
config_type_name = config_type.__name__
if config_type_name in configs_dict:
# Parse the serialized data and update the existing config
updated_data = configs_dict[config_type_name]
data = {**component.config.dict(), **updated_data}
component.config = component.config.__class__(**data)
def _collect_components(self):
components = [
getattr(self, attr)

View File

@@ -1,9 +1,14 @@
from __future__ import annotations
from abc import ABC
from typing import Callable, TypeVar
from typing import Callable, ClassVar, Generic, Optional, TypeVar
T = TypeVar("T", bound="AgentComponent")
from pydantic import BaseModel
from forge.models.config import _update_user_config_from_env, deep_update
AC = TypeVar("AC", bound="AgentComponent")
BM = TypeVar("BM", bound=BaseModel)
class AgentComponent(ABC):
@@ -24,7 +29,7 @@ class AgentComponent(ABC):
"""Return the reason this component is disabled."""
return self._disabled_reason
def run_after(self: T, *components: type[AgentComponent] | AgentComponent) -> T:
def run_after(self: AC, *components: type[AgentComponent] | AgentComponent) -> AC:
"""Set the components that this component should run after."""
for component in components:
t = component if isinstance(component, type) else type(component)
@@ -33,6 +38,39 @@ class AgentComponent(ABC):
return self
class ConfigurableComponent(ABC, Generic[BM]):
"""A component that can be configured with a Pydantic model."""
config_class: ClassVar[type[BM]] # type: ignore
def __init__(self, configuration: Optional[BM]):
self._config: Optional[BM] = None
if configuration is not None:
self.config = configuration
def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
if getattr(cls, "config_class", None) is None:
raise NotImplementedError(
f"ConfigurableComponent subclass {cls.__name__} "
"must define config_class class attribute."
)
@property
def config(self) -> BM:
if not hasattr(self, "_config") or self._config is None:
self.config = self.config_class()
return self._config # type: ignore
@config.setter
def config(self, config: BM):
if not hasattr(self, "_config") or self._config is None:
# Load configuration from environment variables
updated = _update_user_config_from_env(config)
config = self.config_class(**deep_update(config.dict(), updated))
self._config = config
class ComponentEndpointError(Exception):
"""Error of a single protocol method on a component."""

View File

@@ -30,6 +30,48 @@ class MyAgent(BaseAgent):
self.some_component = SomeComponent(self.hello_component)
```
## Component configuration
Each component can have its own configuration defined using a regular pydantic `BaseModel`.
To ensure the configuration is loaded from the file correctly, the component must inherit from `ConfigurableComponent[T]` where `T` is the configuration model it uses.
`ConfigurableComponent` provides a `config` attribute that holds the configuration instance.
It's possible to either set the `config` attribute directly or pass the configuration instance to the component's constructor.
Extra configuration (i.e. for components that are not part of the agent) can be passed and will be silently ignored. Extra config won't be applied even if the component is added later.
To see the configuration of built-in components visit [Built-in Components](./built-in-components.md).
```py
from pydantic import BaseModel
from forge.agent.components import ConfigurableComponent
class MyConfig(BaseModel):
some_value: str
class MyComponent(AgentComponent, ConfigurableComponent[MyConfig]):
def __init__(self, config: MyConfig):
super().__init__(config)
# This has the same effect as above:
# self.config = config
def get_some_value(self) -> str:
# Access the configuration like a regular model
return self.config.some_value
```
### Sensitive information
While it's possible to pass sensitive data directly in code to the configuration it's recommended to use `UserConfigurable(from_env="ENV_VAR_NAME", exclude=True)` field for sensitive data like API keys.
The data will be loaded from the environment variable but keep in mind that value passed in code takes precedence.
All fields, even excluded ones (`exclude=True`) will be loaded when the configuration is loaded from the file.
Exclusion allows you to skip them during *serialization*, non excluded `SecretStr` will be serialized literally as a `"**********"` string.
```py
from pydantic import BaseModel, SecretStr
from forge.models.config import UserConfigurable
class SensitiveConfig(BaseModel):
api_key: SecretStr = UserConfigurable(from_env="API_KEY", exclude=True)
```
## Ordering components
The execution order of components is important because some may depend on the results of the previous ones.
@@ -72,6 +114,7 @@ class MyAgent(Agent):
## Disabling components
You can control which components are enabled by setting their `_enabled` attribute.
Components are *enabled* by default.
Either provide a `bool` value or a `Callable[[], bool]`, will be checked each time
the component is about to be executed. This way you can dynamically enable or disable
components based on some conditions.

View File

@@ -1,38 +1,54 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Callable, Iterator, Optional
from typing import Callable, Iterator, Optional
from pydantic import BaseModel
from forge.agent.components import ConfigurableComponent
from forge.agent.protocols import AfterExecute, AfterParse, MessageProvider
from forge.llm.prompting.utils import indent
from forge.llm.providers import ChatMessage, MultiProvider
if TYPE_CHECKING:
from forge.config.config import Config
from forge.llm.providers.multi import ModelName
from forge.llm.providers.openai import OpenAIModelName
from .model import ActionResult, AnyProposal, Episode, EpisodicActionHistory
class ActionHistoryComponent(MessageProvider, AfterParse[AnyProposal], AfterExecute):
class ActionHistoryConfiguration(BaseModel):
model_name: ModelName = OpenAIModelName.GPT3
"""Name of the llm model used to compress the history"""
max_tokens: int = 1024
"""Maximum number of tokens to use up with generated history messages"""
spacy_language_model: str = "en_core_web_sm"
"""Language model used for summary chunking using spacy"""
class ActionHistoryComponent(
MessageProvider,
AfterParse[AnyProposal],
AfterExecute,
ConfigurableComponent[ActionHistoryConfiguration],
):
"""Keeps track of the event history and provides a summary of the steps."""
config_class = ActionHistoryConfiguration
def __init__(
self,
event_history: EpisodicActionHistory[AnyProposal],
max_tokens: int,
count_tokens: Callable[[str], int],
legacy_config: Config,
llm_provider: MultiProvider,
config: Optional[ActionHistoryConfiguration] = None,
) -> None:
ConfigurableComponent.__init__(self, config)
self.event_history = event_history
self.max_tokens = max_tokens
self.count_tokens = count_tokens
self.legacy_config = legacy_config
self.llm_provider = llm_provider
def get_messages(self) -> Iterator[ChatMessage]:
if progress := self._compile_progress(
self.event_history.episodes,
self.max_tokens,
self.config.max_tokens,
self.count_tokens,
):
yield ChatMessage.system(f"## Progress on your Task so far\n\n{progress}")
@@ -43,7 +59,7 @@ class ActionHistoryComponent(MessageProvider, AfterParse[AnyProposal], AfterExec
async def after_execute(self, result: ActionResult) -> None:
self.event_history.register_result(result)
await self.event_history.handle_compression(
self.llm_provider, self.legacy_config
self.llm_provider, self.config.model_name, self.config.spacy_language_model
)
def _compile_progress(

View File

@@ -8,11 +8,11 @@ from pydantic.generics import GenericModel
from forge.content_processing.text import summarize_text
from forge.llm.prompting.utils import format_numbered_list, indent
from forge.llm.providers.multi import ModelName
from forge.models.action import ActionResult, AnyProposal
from forge.models.utils import ModelWithSummary
if TYPE_CHECKING:
from forge.config.config import Config
from forge.llm.providers import MultiProvider
@@ -108,7 +108,10 @@ class EpisodicActionHistory(GenericModel, Generic[AnyProposal]):
self.cursor = len(self.episodes)
async def handle_compression(
self, llm_provider: MultiProvider, app_config: Config
self,
llm_provider: MultiProvider,
model_name: ModelName,
spacy_model: str,
) -> None:
"""Compresses each episode in the action history using an LLM.
@@ -131,7 +134,8 @@ class EpisodicActionHistory(GenericModel, Generic[AnyProposal]):
episode.format(),
instruction=compress_instruction,
llm_provider=llm_provider,
config=app_config,
model_name=model_name,
spacy_model=spacy_model,
)
for episode in episodes_to_summarize
]

View File

@@ -1,9 +1,4 @@
from .code_executor import (
ALLOWLIST_CONTROL,
DENYLIST_CONTROL,
CodeExecutionError,
CodeExecutorComponent,
)
from .code_executor import CodeExecutionError, CodeExecutorComponent
__all__ = [
"ALLOWLIST_CONTROL",

View File

@@ -5,16 +5,16 @@ import shlex
import string
import subprocess
from pathlib import Path
from typing import Iterator
from typing import Iterator, Literal, Optional
import docker
from docker.errors import DockerException, ImageNotFound, NotFound
from docker.models.containers import Container as DockerContainer
from pydantic import BaseModel, Field
from forge.agent import BaseAgentSettings
from forge.agent.components import ConfigurableComponent
from forge.agent.protocols import CommandProvider
from forge.command import Command, command
from forge.config.config import Config
from forge.file_storage import FileStorage
from forge.models.json_schema import JSONSchema
from forge.utils.exceptions import (
@@ -25,9 +25,6 @@ from forge.utils.exceptions import (
logger = logging.getLogger(__name__)
ALLOWLIST_CONTROL = "allowlist"
DENYLIST_CONTROL = "denylist"
def we_are_running_in_a_docker_container() -> bool:
"""Check if we are running in a Docker container
@@ -56,15 +53,45 @@ class CodeExecutionError(CommandExecutionError):
"""The operation (an attempt to run arbitrary code) returned an error"""
class CodeExecutorComponent(CommandProvider):
class CodeExecutorConfiguration(BaseModel):
execute_local_commands: bool = False
"""Enable shell command execution"""
shell_command_control: Literal["allowlist", "denylist"] = "allowlist"
"""Controls which list is used"""
shell_allowlist: list[str] = Field(default_factory=list)
"""List of allowed shell commands"""
shell_denylist: list[str] = Field(default_factory=list)
"""List of prohibited shell commands"""
docker_container_name: str = "agent_sandbox"
"""Name of the Docker container used for code execution"""
class CodeExecutorComponent(
CommandProvider, ConfigurableComponent[CodeExecutorConfiguration]
):
"""Provides commands to execute Python code and shell commands."""
config_class = CodeExecutorConfiguration
def __init__(
self, workspace: FileStorage, state: BaseAgentSettings, config: Config
self,
workspace: FileStorage,
config: Optional[CodeExecutorConfiguration] = None,
):
ConfigurableComponent.__init__(self, config)
self.workspace = workspace
self.state = state
self.legacy_config = config
# Change container name if it's empty or default to prevent different agents
# from using the same container
default_container_name = self.config.__fields__["docker_container_name"].default
if (
not self.config.docker_container_name
or self.config.docker_container_name == default_container_name
):
random_suffix = "".join(random.choices(string.ascii_lowercase, k=8))
self.config.docker_container_name = (
f"{default_container_name}_{random_suffix}"
)
if not we_are_running_in_a_docker_container() and not is_docker_available():
logger.info(
@@ -72,7 +99,7 @@ class CodeExecutorComponent(CommandProvider):
"The code execution commands will not be available."
)
if not self.legacy_config.execute_local_commands:
if not self.config.execute_local_commands:
logger.info(
"Local shell commands are disabled. To enable them,"
" set EXECUTE_LOCAL_COMMANDS to 'True' in your config file."
@@ -83,7 +110,7 @@ class CodeExecutorComponent(CommandProvider):
yield self.execute_python_code
yield self.execute_python_file
if self.legacy_config.execute_local_commands:
if self.config.execute_local_commands:
yield self.execute_shell
yield self.execute_shell_popen
@@ -192,7 +219,7 @@ class CodeExecutorComponent(CommandProvider):
logger.debug("App is not running in a Docker container")
return self._run_python_code_in_docker(file_path, args)
def validate_command(self, command_line: str, config: Config) -> tuple[bool, bool]:
def validate_command(self, command_line: str) -> tuple[bool, bool]:
"""Check whether a command is allowed and whether it may be executed in a shell.
If shell command control is enabled, we disallow executing in a shell, because
@@ -211,10 +238,10 @@ class CodeExecutorComponent(CommandProvider):
command_name = shlex.split(command_line)[0]
if config.shell_command_control == ALLOWLIST_CONTROL:
return command_name in config.shell_allowlist, False
elif config.shell_command_control == DENYLIST_CONTROL:
return command_name not in config.shell_denylist, False
if self.config.shell_command_control == "allowlist":
return command_name in self.config.shell_allowlist, False
elif self.config.shell_command_control == "denylist":
return command_name not in self.config.shell_denylist, False
else:
return True, True
@@ -238,9 +265,7 @@ class CodeExecutorComponent(CommandProvider):
Returns:
str: The output of the command
"""
allow_execute, allow_shell = self.validate_command(
command_line, self.legacy_config
)
allow_execute, allow_shell = self.validate_command(command_line)
if not allow_execute:
logger.info(f"Command '{command_line}' not allowed")
raise OperationNotAllowedError("This shell command is not allowed.")
@@ -287,9 +312,7 @@ class CodeExecutorComponent(CommandProvider):
Returns:
str: Description of the fact that the process started and its id
"""
allow_execute, allow_shell = self.validate_command(
command_line, self.legacy_config
)
allow_execute, allow_shell = self.validate_command(command_line)
if not allow_execute:
logger.info(f"Command '{command_line}' not allowed")
raise OperationNotAllowedError("This shell command is not allowed.")
@@ -320,12 +343,10 @@ class CodeExecutorComponent(CommandProvider):
"""Run a Python script in a Docker container"""
file_path = self.workspace.get_path(filename)
try:
assert self.state.agent_id, "Need Agent ID to attach Docker container"
client = docker.from_env()
image_name = "python:3-alpine"
container_is_fresh = False
container_name = f"{self.state.agent_id}_sandbox"
container_name = self.config.docker_container_name
with self.workspace.mount() as local_path:
try:
container: DockerContainer = client.containers.get(

View File

@@ -3,7 +3,10 @@ import os
from pathlib import Path
from typing import Iterator, Optional
from pydantic import BaseModel
from forge.agent import BaseAgentSettings
from forge.agent.components import ConfigurableComponent
from forge.agent.protocols import CommandProvider, DirectiveProvider
from forge.command import Command, command
from forge.file_storage.base import FileStorage
@@ -13,67 +16,89 @@ from forge.utils.file_operations import decode_textual_file
logger = logging.getLogger(__name__)
class FileManagerComponent(DirectiveProvider, CommandProvider):
class FileManagerConfiguration(BaseModel):
storage_path: str
"""Path to agent files, e.g. state"""
workspace_path: str
"""Path to files that agent has access to"""
class Config:
# Prevent mutation of the configuration
# as this wouldn't be reflected in the file storage
allow_mutation = False
class FileManagerComponent(
DirectiveProvider, CommandProvider, ConfigurableComponent[FileManagerConfiguration]
):
"""
Adds general file manager (e.g. Agent state),
workspace manager (e.g. Agent output files) support and
commands to perform operations on files and folders.
"""
files: FileStorage
"""Agent-related files, e.g. state, logs.
Use `workspace` to access the agent's workspace files."""
workspace: FileStorage
"""Workspace that the agent has access to, e.g. for reading/writing files.
Use `files` to access agent-related files, e.g. state, logs."""
config_class = FileManagerConfiguration
STATE_FILE = "state.json"
"""The name of the file where the agent's state is stored."""
def __init__(self, state: BaseAgentSettings, file_storage: FileStorage):
self.state = state
def __init__(
self,
file_storage: FileStorage,
agent_state: BaseAgentSettings,
config: Optional[FileManagerConfiguration] = None,
):
"""Initialise the FileManagerComponent.
Either `agent_id` or `config` must be provided.
if not state.agent_id:
Args:
file_storage (FileStorage): The file storage instance to use.
state (BaseAgentSettings): The agent's state.
config (FileManagerConfiguration, optional): The configuration for
the file manager. Defaults to None.
"""
if not agent_state.agent_id:
raise ValueError("Agent must have an ID.")
self.files = file_storage.clone_with_subroot(f"agents/{state.agent_id}/")
self.workspace = file_storage.clone_with_subroot(
f"agents/{state.agent_id}/workspace"
)
self.agent_state = agent_state
if not config:
storage_path = f"agents/{self.agent_state.agent_id}/"
workspace_path = f"agents/{self.agent_state.agent_id}/workspace"
ConfigurableComponent.__init__(
self,
FileManagerConfiguration(
storage_path=storage_path, workspace_path=workspace_path
),
)
else:
ConfigurableComponent.__init__(self, config)
self.storage = file_storage.clone_with_subroot(self.config.storage_path)
"""Agent-related files, e.g. state, logs.
Use `workspace` to access the agent's workspace files."""
self.workspace = file_storage.clone_with_subroot(self.config.workspace_path)
"""Workspace that the agent has access to, e.g. for reading/writing files.
Use `storage` to access agent-related files, e.g. state, logs."""
self._file_storage = file_storage
async def save_state(self, save_as: Optional[str] = None) -> None:
"""Save the agent's state to the state file."""
state: BaseAgentSettings = getattr(self, "state")
if save_as:
temp_id = state.agent_id
state.agent_id = save_as
self._file_storage.make_dir(f"agents/{save_as}")
async def save_state(self, save_as_id: Optional[str] = None) -> None:
"""Save the agent's data and state."""
if save_as_id:
self._file_storage.make_dir(f"agents/{save_as_id}")
# Save state
await self._file_storage.write_file(
f"agents/{save_as}/{self.STATE_FILE}", state.json()
f"agents/{save_as_id}/{self.STATE_FILE}", self.agent_state.json()
)
# Copy workspace
self._file_storage.copy(
f"agents/{temp_id}/workspace",
f"agents/{save_as}/workspace",
self.config.workspace_path,
f"agents/{save_as_id}/workspace",
)
state.agent_id = temp_id
else:
await self.files.write_file(self.files.root / self.STATE_FILE, state.json())
def change_agent_id(self, new_id: str):
"""Change the agent's ID and update the file storage accordingly."""
state: BaseAgentSettings = getattr(self, "state")
# Rename the agent's files and workspace
self._file_storage.rename(f"agents/{state.agent_id}", f"agents/{new_id}")
# Update the file storage objects
self.files = self._file_storage.clone_with_subroot(f"agents/{new_id}/")
self.workspace = self._file_storage.clone_with_subroot(
f"agents/{new_id}/workspace"
)
state.agent_id = new_id
await self.storage.write_file(
self.storage.root / self.STATE_FILE, self.agent_state.json()
)
def get_resources(self) -> Iterator[str]:
yield "The ability to read and write files."

View File

@@ -1,23 +1,36 @@
from pathlib import Path
from typing import Iterator
from typing import Iterator, Optional
from git.repo import Repo
from pydantic import BaseModel, SecretStr
from forge.agent.components import ConfigurableComponent
from forge.agent.protocols import CommandProvider
from forge.command import Command, command
from forge.config.config import Config
from forge.models.config import UserConfigurable
from forge.models.json_schema import JSONSchema
from forge.utils.exceptions import CommandExecutionError
from forge.utils.url_validator import validate_url
class GitOperationsComponent(CommandProvider):
class GitOperationsConfiguration(BaseModel):
github_username: Optional[str] = UserConfigurable(from_env="GITHUB_USERNAME")
github_api_key: Optional[SecretStr] = UserConfigurable(
from_env="GITHUB_API_KEY", exclude=True
)
class GitOperationsComponent(
CommandProvider, ConfigurableComponent[GitOperationsConfiguration]
):
"""Provides commands to perform Git operations."""
def __init__(self, config: Config):
self._enabled = bool(config.github_username and config.github_api_key)
config_class = GitOperationsConfiguration
def __init__(self, config: Optional[GitOperationsConfiguration] = None):
ConfigurableComponent.__init__(self, config)
self._enabled = bool(self.config.github_username and self.config.github_api_key)
self._disabled_reason = "Configure github_username and github_api_key."
self.legacy_config = config
def get_commands(self) -> Iterator[Command]:
yield self.clone_repository
@@ -48,9 +61,13 @@ class GitOperationsComponent(CommandProvider):
str: The result of the clone operation.
"""
split_url = url.split("//")
auth_repo_url = (
f"//{self.legacy_config.github_username}:"
f"{self.legacy_config.github_api_key}@".join(split_url)
api_key = (
self.config.github_api_key.get_secret_value()
if self.config.github_api_key
else None
)
auth_repo_url = f"//{self.config.github_username}:" f"{api_key}@".join(
split_url
)
try:
Repo.clone_from(url=auth_repo_url, to_path=clone_path)

View File

@@ -1,5 +1,3 @@
"""Commands to generate images based on text input"""
import io
import json
import logging
@@ -7,35 +5,61 @@ import time
import uuid
from base64 import b64decode
from pathlib import Path
from typing import Iterator
from typing import Iterator, Literal, Optional
import requests
from openai import OpenAI
from PIL import Image
from pydantic import BaseModel, SecretStr
from forge.agent.components import ConfigurableComponent
from forge.agent.protocols import CommandProvider
from forge.command import Command, command
from forge.config.config import Config
from forge.file_storage import FileStorage
from forge.llm.providers.openai import OpenAICredentials
from forge.models.config import UserConfigurable
from forge.models.json_schema import JSONSchema
logger = logging.getLogger(__name__)
class ImageGeneratorComponent(CommandProvider):
class ImageGeneratorConfiguration(BaseModel):
image_provider: Literal["dalle", "huggingface", "sdwebui"] = "dalle"
huggingface_image_model: str = "CompVis/stable-diffusion-v1-4"
huggingface_api_token: Optional[SecretStr] = UserConfigurable(
from_env="HUGGINGFACE_API_TOKEN", exclude=True
)
sd_webui_url: str = "http://localhost:7860"
sd_webui_auth: Optional[SecretStr] = UserConfigurable(
from_env="SD_WEBUI_AUTH", exclude=True
)
class ImageGeneratorComponent(
CommandProvider, ConfigurableComponent[ImageGeneratorConfiguration]
):
"""A component that provides commands to generate images from text prompts."""
def __init__(self, workspace: FileStorage, config: Config):
self._enabled = bool(config.image_provider)
config_class = ImageGeneratorConfiguration
def __init__(
self,
workspace: FileStorage,
config: Optional[ImageGeneratorConfiguration] = None,
openai_credentials: Optional[OpenAICredentials] = None,
):
"""openai_credentials only needed for `dalle` provider."""
ConfigurableComponent.__init__(self, config)
self.openai_credentials = openai_credentials
self._enabled = bool(self.config.image_provider)
self._disabled_reason = "No image provider set."
self.workspace = workspace
self.legacy_config = config
def get_commands(self) -> Iterator[Command]:
if (
self.legacy_config.openai_credentials
or self.legacy_config.huggingface_api_token
or self.legacy_config.sd_webui_auth
self.openai_credentials
or self.config.huggingface_api_token
or self.config.sd_webui_auth
):
yield self.generate_image
@@ -48,7 +72,7 @@ class ImageGeneratorComponent(CommandProvider):
),
"size": JSONSchema(
type=JSONSchema.Type.INTEGER,
description="The size of the image",
description="The size of the image [256, 512, 1024]",
required=False,
),
},
@@ -65,22 +89,21 @@ class ImageGeneratorComponent(CommandProvider):
str: The filename of the image
"""
filename = self.workspace.root / f"{str(uuid.uuid4())}.jpg"
cfg = self.legacy_config
if cfg.openai_credentials and (
cfg.image_provider == "dalle"
or not (cfg.huggingface_api_token or cfg.sd_webui_url)
if self.openai_credentials and (
self.config.image_provider == "dalle"
or not (self.config.huggingface_api_token or self.config.sd_webui_url)
):
return self.generate_image_with_dalle(prompt, filename, size)
elif cfg.huggingface_api_token and (
cfg.image_provider == "huggingface"
or not (cfg.openai_credentials or cfg.sd_webui_url)
elif self.config.huggingface_api_token and (
self.config.image_provider == "huggingface"
or not (self.openai_credentials or self.config.sd_webui_url)
):
return self.generate_image_with_hf(prompt, filename)
elif cfg.sd_webui_url and (
cfg.image_provider == "sdwebui" or cfg.sd_webui_auth
elif self.config.sd_webui_url and (
self.config.image_provider == "sdwebui" or self.config.sd_webui_auth
):
return self.generate_image_with_sd_webui(prompt, filename, size)
@@ -96,13 +119,15 @@ class ImageGeneratorComponent(CommandProvider):
Returns:
str: The filename of the image
"""
API_URL = f"https://api-inference.huggingface.co/models/{self.legacy_config.huggingface_image_model}" # noqa: E501
if self.legacy_config.huggingface_api_token is None:
API_URL = f"https://api-inference.huggingface.co/models/{self.config.huggingface_image_model}" # noqa: E501
if self.config.huggingface_api_token is None:
raise ValueError(
"You need to set your Hugging Face API token in the config file."
)
headers = {
"Authorization": f"Bearer {self.legacy_config.huggingface_api_token}",
"Authorization": (
f"Bearer {self.config.huggingface_api_token.get_secret_value()}"
),
"X-Use-Cache": "false",
}
@@ -156,7 +181,7 @@ class ImageGeneratorComponent(CommandProvider):
Returns:
str: The filename of the image
"""
assert self.legacy_config.openai_credentials # otherwise this tool is disabled
assert self.openai_credentials # otherwise this tool is disabled
# Check for supported image sizes
if size not in [256, 512, 1024]:
@@ -169,7 +194,10 @@ class ImageGeneratorComponent(CommandProvider):
# TODO: integrate in `forge.llm.providers`(?)
response = OpenAI(
api_key=self.legacy_config.openai_credentials.api_key.get_secret_value()
api_key=self.openai_credentials.api_key.get_secret_value(),
organization=self.openai_credentials.organization.get_secret_value()
if self.openai_credentials.organization
else None,
).images.generate(
prompt=prompt,
n=1,
@@ -208,13 +236,13 @@ class ImageGeneratorComponent(CommandProvider):
"""
# Create a session and set the basic auth if needed
s = requests.Session()
if self.legacy_config.sd_webui_auth:
username, password = self.legacy_config.sd_webui_auth.split(":")
if self.config.sd_webui_auth:
username, password = self.config.sd_webui_auth.get_secret_value().split(":")
s.auth = (username, password or "")
# Generate the images
response = requests.post(
f"{self.legacy_config.sd_webui_url}/sdapi/v1/txt2img",
f"{self.config.sd_webui_url}/sdapi/v1/txt2img",
json={
"prompt": prompt,
"negative_prompt": negative_prompt,

View File

@@ -4,7 +4,6 @@ import click
from forge.agent.protocols import CommandProvider
from forge.command import Command, command
from forge.config.config import Config
from forge.models.json_schema import JSONSchema
from forge.utils.const import ASK_COMMAND
@@ -12,9 +11,6 @@ from forge.utils.const import ASK_COMMAND
class UserInteractionComponent(CommandProvider):
"""Provides commands to interact with the user."""
def __init__(self, config: Config):
self._enabled = not config.noninteractive_mode
def get_commands(self) -> Iterator[Command]:
yield self.ask_user

View File

@@ -22,7 +22,7 @@ class WatchdogComponent(AfterParse[AnyProposal]):
def __init__(
self,
config: "BaseAgentConfiguration",
config: BaseAgentConfiguration,
event_history: EpisodicActionHistory[AnyProposal],
):
self.config = config

View File

@@ -1,30 +1,44 @@
import json
import logging
import time
from typing import Iterator
from typing import Iterator, Optional
from duckduckgo_search import DDGS
from pydantic import BaseModel, SecretStr
from forge.agent.components import ConfigurableComponent
from forge.agent.protocols import CommandProvider, DirectiveProvider
from forge.command import Command, command
from forge.config.config import Config
from forge.models.config import UserConfigurable
from forge.models.json_schema import JSONSchema
from forge.utils.exceptions import ConfigurationError
DUCKDUCKGO_MAX_ATTEMPTS = 3
logger = logging.getLogger(__name__)
class WebSearchComponent(DirectiveProvider, CommandProvider):
class WebSearchConfiguration(BaseModel):
google_api_key: Optional[SecretStr] = UserConfigurable(
from_env="GOOGLE_API_KEY", exclude=True
)
google_custom_search_engine_id: Optional[SecretStr] = UserConfigurable(
from_env="GOOGLE_CUSTOM_SEARCH_ENGINE_ID", exclude=True
)
duckduckgo_max_attempts: int = 3
class WebSearchComponent(
DirectiveProvider, CommandProvider, ConfigurableComponent[WebSearchConfiguration]
):
"""Provides commands to search the web."""
def __init__(self, config: Config):
self.legacy_config = config
config_class = WebSearchConfiguration
def __init__(self, config: Optional[WebSearchConfiguration] = None):
ConfigurableComponent.__init__(self, config)
if (
not self.legacy_config.google_api_key
or not self.legacy_config.google_custom_search_engine_id
not self.config.google_api_key
or not self.config.google_custom_search_engine_id
):
logger.info(
"Configure google_api_key and custom_search_engine_id "
@@ -37,10 +51,7 @@ class WebSearchComponent(DirectiveProvider, CommandProvider):
def get_commands(self) -> Iterator[Command]:
yield self.web_search
if (
self.legacy_config.google_api_key
and self.legacy_config.google_custom_search_engine_id
):
if self.config.google_api_key and self.config.google_custom_search_engine_id:
yield self.google
@command(
@@ -74,7 +85,7 @@ class WebSearchComponent(DirectiveProvider, CommandProvider):
search_results = []
attempts = 0
while attempts < DUCKDUCKGO_MAX_ATTEMPTS:
while attempts < self.config.duckduckgo_max_attempts:
if not query:
return json.dumps(search_results)
@@ -136,17 +147,25 @@ class WebSearchComponent(DirectiveProvider, CommandProvider):
from googleapiclient.errors import HttpError
try:
# Get the Google API key and Custom Search Engine ID from the config file
api_key = self.legacy_config.google_api_key
custom_search_engine_id = self.legacy_config.google_custom_search_engine_id
# Should be the case if this command is enabled:
assert self.config.google_api_key
assert self.config.google_custom_search_engine_id
# Initialize the Custom Search API service
service = build("customsearch", "v1", developerKey=api_key)
service = build(
"customsearch",
"v1",
developerKey=self.config.google_api_key.get_secret_value(),
)
# Send the search query and retrieve the results
result = (
service.cse()
.list(q=query, cx=custom_search_engine_id, num=num_results)
.list(
q=query,
cx=self.config.google_custom_search_engine_id.get_secret_value(),
num=num_results,
)
.execute()
)

View File

@@ -3,10 +3,11 @@ import logging
import re
from pathlib import Path
from sys import platform
from typing import Iterator, Type
from typing import Iterator, Literal, Optional, Type
from urllib.request import urlretrieve
from bs4 import BeautifulSoup
from pydantic import BaseModel
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.webdriver.chrome.service import Service as ChromeDriverService
@@ -27,12 +28,14 @@ from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager
from webdriver_manager.microsoft import EdgeChromiumDriverManager as EdgeDriverManager
from forge.agent.components import ConfigurableComponent
from forge.agent.protocols import CommandProvider, DirectiveProvider
from forge.command import Command, command
from forge.config.config import Config
from forge.content_processing.html import extract_hyperlinks, format_hyperlinks
from forge.content_processing.text import extract_information, summarize_text
from forge.llm.providers import ChatModelInfo, MultiProvider
from forge.llm.providers import MultiProvider
from forge.llm.providers.multi import ModelName
from forge.llm.providers.openai import OpenAIModelName
from forge.models.json_schema import JSONSchema
from forge.utils.exceptions import CommandExecutionError, TooMuchOutputError
from forge.utils.url_validator import validate_url
@@ -51,18 +54,38 @@ class BrowsingError(CommandExecutionError):
"""An error occurred while trying to browse the page"""
class WebSeleniumComponent(DirectiveProvider, CommandProvider):
class WebSeleniumConfiguration(BaseModel):
model_name: ModelName = OpenAIModelName.GPT3
"""Name of the llm model used to read websites"""
web_browser: Literal["chrome", "firefox", "safari", "edge"] = "chrome"
"""Web browser used by Selenium"""
headless: bool = True
"""Run browser in headless mode"""
user_agent: str = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
)
"""User agent used by the browser"""
browse_spacy_language_model: str = "en_core_web_sm"
"""Spacy language model used for chunking text"""
class WebSeleniumComponent(
DirectiveProvider, CommandProvider, ConfigurableComponent[WebSeleniumConfiguration]
):
"""Provides commands to browse the web using Selenium."""
config_class = WebSeleniumConfiguration
def __init__(
self,
config: Config,
llm_provider: MultiProvider,
model_info: ChatModelInfo,
data_dir: Path,
config: Optional[WebSeleniumConfiguration] = None,
):
self.legacy_config = config
ConfigurableComponent.__init__(self, config)
self.llm_provider = llm_provider
self.model_info = model_info
self.data_dir = data_dir
def get_resources(self) -> Iterator[str]:
yield "Ability to read websites."
@@ -129,7 +152,7 @@ class WebSeleniumComponent(DirectiveProvider, CommandProvider):
"""
driver = None
try:
driver = await self.open_page_in_browser(url, self.legacy_config)
driver = await self.open_page_in_browser(url)
text = self.scrape_text_with_selenium(driver)
links = self.scrape_links_with_selenium(driver, url)
@@ -141,7 +164,7 @@ class WebSeleniumComponent(DirectiveProvider, CommandProvider):
elif get_raw_content:
if (
output_tokens := self.llm_provider.count_tokens(
text, self.model_info.name
text, self.config.model_name
)
) > MAX_RAW_CONTENT_LENGTH:
oversize_factor = round(output_tokens / MAX_RAW_CONTENT_LENGTH, 1)
@@ -228,7 +251,7 @@ class WebSeleniumComponent(DirectiveProvider, CommandProvider):
return format_hyperlinks(hyperlinks)
async def open_page_in_browser(self, url: str, config: Config) -> WebDriver:
async def open_page_in_browser(self, url: str) -> WebDriver:
"""Open a browser window and load a web page using Selenium
Params:
@@ -248,11 +271,11 @@ class WebSeleniumComponent(DirectiveProvider, CommandProvider):
"safari": SafariOptions,
}
options: BrowserOptions = options_available[config.selenium_web_browser]()
options.add_argument(f"user-agent={config.user_agent}")
options: BrowserOptions = options_available[self.config.web_browser]()
options.add_argument(f"user-agent={self.config.user_agent}")
if isinstance(options, FirefoxOptions):
if config.selenium_headless:
if self.config.headless:
options.headless = True # type: ignore
options.add_argument("--disable-gpu")
driver = FirefoxDriver(
@@ -274,13 +297,11 @@ class WebSeleniumComponent(DirectiveProvider, CommandProvider):
options.add_argument("--remote-debugging-port=9222")
options.add_argument("--no-sandbox")
if config.selenium_headless:
if self.config.headless:
options.add_argument("--headless=new")
options.add_argument("--disable-gpu")
self._sideload_chrome_extensions(
options, config.app_data_dir / "assets" / "crx"
)
self._sideload_chrome_extensions(options, self.data_dir / "assets" / "crx")
if (chromium_driver_path := Path("/usr/bin/chromedriver")).exists():
chrome_service = ChromeDriverService(str(chromium_driver_path))
@@ -361,7 +382,8 @@ class WebSeleniumComponent(DirectiveProvider, CommandProvider):
text,
topics_of_interest=topics_of_interest,
llm_provider=self.llm_provider,
config=self.legacy_config,
model_name=self.config.model_name,
spacy_model=self.config.browse_spacy_language_model,
)
return "\n".join(f"* {i}" for i in information)
else:
@@ -369,6 +391,7 @@ class WebSeleniumComponent(DirectiveProvider, CommandProvider):
text,
question=question,
llm_provider=self.llm_provider,
config=self.legacy_config,
model_name=self.config.model_name,
spacy_model=self.config.browse_spacy_language_model,
)
return result

View File

@@ -3,12 +3,10 @@ This module contains configuration models and helpers for AutoGPT Forge.
"""
from .ai_directives import AIDirectives
from .ai_profile import AIProfile
from .config import Config, ConfigBuilder, assert_config_has_required_llm_api_keys
from .base import BaseConfig
__all__ = [
"assert_config_has_required_llm_api_keys",
"AIProfile",
"AIDirectives",
"Config",
"ConfigBuilder",
"BaseConfig",
]

View File

@@ -0,0 +1,16 @@
from forge.file_storage import FileStorageBackendName
from forge.models.config import SystemSettings, UserConfigurable
from forge.speech.say import TTSConfig
class BaseConfig(SystemSettings):
name: str = "Base configuration"
description: str = "Default configuration for forge agent."
# TTS configuration
tts_config: TTSConfig = TTSConfig()
# File storage
file_storage_backend: FileStorageBackendName = UserConfigurable(
default=FileStorageBackendName.LOCAL, from_env="FILE_STORAGE_BACKEND"
)

View File

@@ -3,16 +3,14 @@ from __future__ import annotations
import logging
import math
from typing import TYPE_CHECKING, Iterator, Optional, TypeVar
from typing import Iterator, Optional, TypeVar
import spacy
if TYPE_CHECKING:
from forge.config.config import Config
from forge.json.parsing import extract_list_from_json
from forge.llm.prompting import ChatPrompt
from forge.llm.providers import ChatMessage, ModelTokenizer, MultiProvider
from forge.llm.providers.multi import ModelName
logger = logging.getLogger(__name__)
@@ -57,7 +55,8 @@ def chunk_content(
async def summarize_text(
text: str,
llm_provider: MultiProvider,
config: Config,
model_name: ModelName,
spacy_model: str = "en_core_web_sm",
question: Optional[str] = None,
instruction: Optional[str] = None,
) -> tuple[str, list[tuple[str, str]]]:
@@ -82,7 +81,8 @@ async def summarize_text(
text=text,
instruction=instruction,
llm_provider=llm_provider,
config=config,
model_name=model_name,
spacy_model=spacy_model,
)
@@ -90,7 +90,8 @@ async def extract_information(
source_text: str,
topics_of_interest: list[str],
llm_provider: MultiProvider,
config: Config,
model_name: ModelName,
spacy_model: str = "en_core_web_sm",
) -> list[str]:
fmt_topics_list = "\n".join(f"* {topic}." for topic in topics_of_interest)
instruction = (
@@ -106,7 +107,8 @@ async def extract_information(
instruction=instruction,
output_type=list[str],
llm_provider=llm_provider,
config=config,
model_name=model_name,
spacy_model=spacy_model,
)
@@ -114,7 +116,8 @@ async def _process_text(
text: str,
instruction: str,
llm_provider: MultiProvider,
config: Config,
model_name: ModelName,
spacy_model: str = "en_core_web_sm",
output_type: type[str | list[str]] = str,
) -> tuple[str, list[tuple[str, str]]] | list[str]:
"""Process text using the OpenAI API for summarization or information extraction
@@ -123,7 +126,8 @@ async def _process_text(
text (str): The text to process.
instruction (str): Additional instruction for processing.
llm_provider: LLM provider to use.
config (Config): The global application config.
model_name: The name of the llm model to use.
spacy_model: The spaCy model to use for sentence splitting.
output_type: `str` for summaries or `list[str]` for piece-wise info extraction.
Returns:
@@ -133,13 +137,11 @@ async def _process_text(
if not text.strip():
raise ValueError("No content")
model = config.fast_llm
text_tlength = llm_provider.count_tokens(text, model)
text_tlength = llm_provider.count_tokens(text, model_name)
logger.debug(f"Text length: {text_tlength} tokens")
max_result_tokens = 500
max_chunk_length = llm_provider.get_token_limit(model) - max_result_tokens - 50
max_chunk_length = llm_provider.get_token_limit(model_name) - max_result_tokens - 50
logger.debug(f"Max chunk length: {max_chunk_length} tokens")
if text_tlength < max_chunk_length:
@@ -157,7 +159,7 @@ async def _process_text(
response = await llm_provider.create_chat_completion(
model_prompt=prompt.messages,
model_name=model,
model_name=model_name,
temperature=0.5,
max_output_tokens=max_result_tokens,
completion_parser=lambda s: (
@@ -182,9 +184,9 @@ async def _process_text(
chunks = list(
split_text(
text,
config=config,
max_chunk_length=max_chunk_length,
tokenizer=llm_provider.get_tokenizer(model),
tokenizer=llm_provider.get_tokenizer(model_name),
spacy_model=spacy_model,
)
)
@@ -196,7 +198,8 @@ async def _process_text(
instruction=instruction,
output_type=output_type,
llm_provider=llm_provider,
config=config,
model_name=model_name,
spacy_model=spacy_model,
)
processed_results.extend(
chunk_result if output_type == list[str] else [chunk_result]
@@ -212,7 +215,8 @@ async def _process_text(
"Combine these partial summaries into one."
),
llm_provider=llm_provider,
config=config,
model_name=model_name,
spacy_model=spacy_model,
)
return summary.strip(), [
(processed_results[i], chunks[i][0]) for i in range(0, len(chunks))
@@ -221,9 +225,9 @@ async def _process_text(
def split_text(
text: str,
config: Config,
max_chunk_length: int,
tokenizer: ModelTokenizer,
spacy_model: str = "en_core_web_sm",
with_overlap: bool = True,
) -> Iterator[tuple[str, int]]:
"""
@@ -231,7 +235,7 @@ def split_text(
Args:
text (str): The text to split.
config (Config): Config object containing the Spacy model setting.
spacy_model (str): The spaCy model to use for sentence splitting.
max_chunk_length (int, optional): The maximum length of a chunk.
tokenizer (ModelTokenizer): Tokenizer to use for determining chunk length.
with_overlap (bool, optional): Whether to allow overlap between chunks.
@@ -251,7 +255,7 @@ def split_text(
n_chunks = math.ceil(text_length / max_chunk_length)
target_chunk_length = math.ceil(text_length / n_chunks)
nlp: spacy.language.Language = spacy.load(config.browse_spacy_language_model)
nlp: spacy.language.Language = spacy.load(spacy_model)
nlp.add_pipe("sentencizer")
doc = nlp(text)
sentences = [sentence.text.strip() for sentence in doc.sents]

View File

@@ -16,6 +16,7 @@ def UserConfigurable(
default_factory: Optional[Callable[[], T]] = None,
from_env: Optional[str | Callable[[], T | None]] = None,
description: str = "",
exclude: bool = False,
**kwargs,
) -> T:
# TODO: use this to auto-generate docs for the application configuration
@@ -25,6 +26,7 @@ def UserConfigurable(
default_factory=default_factory,
from_env=from_env,
description=description,
exclude=exclude,
**kwargs,
user_configurable=True,
)