From 3b0cd9518d8a79a7c99ae92de21356a02a3cc5bb Mon Sep 17 00:00:00 2001 From: "A.Daee" Date: Thu, 25 Jul 2024 12:32:34 +0330 Subject: [PATCH] feat(froge): Improvement in web components (#7068) - Add `duckduckgo_backend` field to `WebSearchComponent` configuration - Add `selenium_proxy` to `WebSeleniumComponent` configuration - Update docs --- autogpt/.env.template | 1 + docs/content/forge/components/built-in-components.md | 12 +++++++----- forge/forge/components/web/search.py | 7 +++++-- forge/forge/components/web/selenium.py | 5 +++++ 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/autogpt/.env.template b/autogpt/.env.template index 8d4894988c..80fa39263a 100644 --- a/autogpt/.env.template +++ b/autogpt/.env.template @@ -105,6 +105,7 @@ ## HUGGINGFACE_API_TOKEN - HuggingFace API token (Default: None) # HUGGINGFACE_API_TOKEN= + ### Stable Diffusion (IMAGE_PROVIDER=sdwebui) ## SD_WEBUI_AUTH - Stable Diffusion Web UI username:password pair (Default: None) diff --git a/docs/content/forge/components/built-in-components.md b/docs/content/forge/components/built-in-components.md index 77ae76909a..39f607a212 100644 --- a/docs/content/forge/components/built-in-components.md +++ b/docs/content/forge/components/built-in-components.md @@ -155,11 +155,12 @@ Allows agent to search the web. Google credentials aren't required for DuckDuckG ### `WebSearchConfiguration` -| Config variable | Details | Type | Default | -| -------------------------------- | ----------------------------------------------------------------------- | ----- | ------- | -| `google_api_key` | Google API key, *ENV:* `GOOGLE_API_KEY` | `str` | `None` | -| `google_custom_search_engine_id` | Google Custom Search Engine ID, *ENV:* `GOOGLE_CUSTOM_SEARCH_ENGINE_ID` | `str` | `None` | -| `duckduckgo_max_attempts` | Maximum number of attempts to search using DuckDuckGo | `int` | `3` | +| Config variable | Details | Type | Default | +| -------------------------------- | ----------------------------------------------------------------------- | --------------------------- | ------- | +| `google_api_key` | Google API key, *ENV:* `GOOGLE_API_KEY` | `str` | `None` | +| `google_custom_search_engine_id` | Google Custom Search Engine ID, *ENV:* `GOOGLE_CUSTOM_SEARCH_ENGINE_ID` | `str` | `None` | +| `duckduckgo_max_attempts` | Maximum number of attempts to search using DuckDuckGo | `int` | `3` | +| `duckduckgo_backend` | Backend to be used for DDG sdk | `"api" \| "html" \| "lite"` | `"api"` | ### DirectiveProvider @@ -183,6 +184,7 @@ Allows agent to read websites using Selenium. | `headless` | Run browser in headless mode | `bool` | `True` | | `user_agent` | User agent used by the browser | `str` | `"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"` | | `browse_spacy_language_model` | Spacy language model used for chunking text | `str` | `"en_core_web_sm"` | +| `selenium_proxy` | Http proxy to use with Selenium | `str` | `None` | ### DirectiveProvider diff --git a/forge/forge/components/web/search.py b/forge/forge/components/web/search.py index a381fd950a..2cc2b4b0b3 100644 --- a/forge/forge/components/web/search.py +++ b/forge/forge/components/web/search.py @@ -1,7 +1,7 @@ import json import logging import time -from typing import Iterator, Optional +from typing import Iterator, Literal, Optional from duckduckgo_search import DDGS from pydantic import BaseModel, SecretStr @@ -24,6 +24,7 @@ class WebSearchConfiguration(BaseModel): None, from_env="GOOGLE_CUSTOM_SEARCH_ENGINE_ID", exclude=True ) duckduckgo_max_attempts: int = 3 + duckduckgo_backend: Literal["api", "html", "lite"] = "api" class WebSearchComponent( @@ -89,7 +90,9 @@ class WebSearchComponent( if not query: return json.dumps(search_results) - search_results = DDGS().text(query, max_results=num_results) + search_results = DDGS().text( + query, max_results=num_results, backend=self.config.duckduckgo_backend + ) if search_results: break diff --git a/forge/forge/components/web/selenium.py b/forge/forge/components/web/selenium.py index 69f5427e00..62042cce5f 100644 --- a/forge/forge/components/web/selenium.py +++ b/forge/forge/components/web/selenium.py @@ -68,6 +68,8 @@ class WebSeleniumConfiguration(BaseModel): """User agent used by the browser""" browse_spacy_language_model: str = "en_core_web_sm" """Spacy language model used for chunking text""" + selenium_proxy: Optional[str] = None + """Http proxy to use with Selenium""" class WebSeleniumComponent( @@ -301,6 +303,9 @@ class WebSeleniumComponent( options.add_argument("--headless=new") options.add_argument("--disable-gpu") + if self.config.selenium_proxy: + options.add_argument(f"--proxy-server={self.config.selenium_proxy}") + self._sideload_chrome_extensions(options, self.data_dir / "assets" / "crx") if (chromium_driver_path := Path("/usr/bin/chromedriver")).exists():