Pass TestSearch benchmark consistently (Add browse_website TOKENS_TO_TRIGGER_SUMMARY) (#5092)

* Added SUMMARIZATION_TRIGGER_LENGTH browse_website won't summarize content that's shorter than SUMMARIZATION_TRIGGER_LENGTH. It defaults to 250 characters, which is approximately 50 tokens. * Refactor BrowserOptions * Use tokens instead of length to trigger summarization * Bugfix * fix: Always return links even if not summarizing feat: Increase the number of links returned from 5 to 20 --------- Co-authored-by: lc0rp <2609411+lc0rp@users.noreply.github.com> Co-authored-by: James Collins <collijk@uw.edu>
2026-04-08 03:00:28 -04:00 · 2023-08-01 14:48:13 -04:00
parent a593c32727
commit 3a2d08fb41
1 changed files with 23 additions and 15 deletions
--- a/autogpt/commands/web_selenium.py
+++ b/autogpt/commands/web_selenium.py
@@ -2,13 +2,15 @@

 from __future__ import annotations

+from autogpt.llm.utils.token_counter import count_string_tokens
+
 COMMAND_CATEGORY = "web_browse"
 COMMAND_CATEGORY_TITLE = "Web Browsing"

 import logging
 from pathlib import Path
 from sys import platform
-from typing import Optional, Type
+from typing import Optional

 from bs4 import BeautifulSoup
 from selenium.common.exceptions import WebDriverException
@@ -16,6 +18,7 @@ from selenium.webdriver.chrome.options import Options as ChromeOptions
 from selenium.webdriver.chrome.service import Service as ChromeDriverService
 from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver
 from selenium.webdriver.common.by import By
+from selenium.webdriver.common.options import ArgOptions as BrowserOptions
 from selenium.webdriver.edge.options import Options as EdgeOptions
 from selenium.webdriver.edge.service import Service as EdgeDriverService
 from selenium.webdriver.edge.webdriver import WebDriver as EdgeDriver
@@ -38,9 +41,9 @@ from autogpt.memory.vector import MemoryItem, get_memory
 from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
 from autogpt.url_utils.validators import validate_url

-BrowserOptions = ChromeOptions | EdgeOptions | FirefoxOptions | SafariOptions
-
 FILE_DIR = Path(__file__).parent.parent
+TOKENS_TO_TRIGGER_SUMMARY = 50
+LINKS_TO_RETURN = 20


@command(
@@ -64,25 +67,30 @@ def browse_website(url: str, question: str, agent: Agent) -> str:
        question (str): The question asked by the user

    Returns:
-        Tuple[str, WebDriver]: The answer and links to the user and the webdriver
+        str: The answer and links to the user and the webdriver
    """
+    driver = None
    try:
        driver, text = scrape_text_with_selenium(url, agent)
+        add_header(driver)
+        if TOKENS_TO_TRIGGER_SUMMARY < count_string_tokens(text, agent.llm.name):
+            text = summarize_memorize_webpage(url, text, question, agent, driver)
+
+        links = scrape_links_with_selenium(driver, url)
+
+        # Limit links to LINKS_TO_RETURN
+        if len(links) > LINKS_TO_RETURN:
+            links = links[:LINKS_TO_RETURN]
+
+        return f"Answer gathered from website: {text}\n\nLinks: {links}"
    except WebDriverException as e:
        # These errors are often quite long and include lots of context.
        # Just grab the first line.
        msg = e.msg.split("\n")[0]
        return f"Error: {msg}"
-
-    add_header(driver)
-    summary = summarize_memorize_webpage(url, text, question, agent, driver)
-    links = scrape_links_with_selenium(driver, url)
-
-    # Limit links to 5
-    if len(links) > 5:
-        links = links[:5]
-    close_browser(driver)
-    return f"Answer gathered from website: {summary}\n\nLinks: {links}"
+    finally:
+        if driver:
+            close_browser(driver)


 def scrape_text_with_selenium(url: str, agent: Agent) -> tuple[WebDriver, str]:
@@ -96,7 +104,7 @@ def scrape_text_with_selenium(url: str, agent: Agent) -> tuple[WebDriver, str]:
    """
    logging.getLogger("selenium").setLevel(logging.CRITICAL)

-    options_available: dict[str, Type[BrowserOptions]] = {
+    options_available: dict[str, BrowserOptions] = {
        "chrome": ChromeOptions,
        "edge": EdgeOptions,
        "firefox": FirefoxOptions,