mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
Pass TestSearch benchmark consistently (Add browse_website TOKENS_TO_TRIGGER_SUMMARY) (#5092)
* Added SUMMARIZATION_TRIGGER_LENGTH browse_website won't summarize content that's shorter than SUMMARIZATION_TRIGGER_LENGTH. It defaults to 250 characters, which is approximately 50 tokens. * Refactor BrowserOptions * Use tokens instead of length to trigger summarization * Bugfix * fix: Always return links even if not summarizing feat: Increase the number of links returned from 5 to 20 --------- Co-authored-by: lc0rp <2609411+lc0rp@users.noreply.github.com> Co-authored-by: James Collins <collijk@uw.edu>
This commit is contained in:
@@ -2,13 +2,15 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from autogpt.llm.utils.token_counter import count_string_tokens
|
||||
|
||||
COMMAND_CATEGORY = "web_browse"
|
||||
COMMAND_CATEGORY_TITLE = "Web Browsing"
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from sys import platform
|
||||
from typing import Optional, Type
|
||||
from typing import Optional
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from selenium.common.exceptions import WebDriverException
|
||||
@@ -16,6 +18,7 @@ from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||
from selenium.webdriver.chrome.service import Service as ChromeDriverService
|
||||
from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.options import ArgOptions as BrowserOptions
|
||||
from selenium.webdriver.edge.options import Options as EdgeOptions
|
||||
from selenium.webdriver.edge.service import Service as EdgeDriverService
|
||||
from selenium.webdriver.edge.webdriver import WebDriver as EdgeDriver
|
||||
@@ -38,9 +41,9 @@ from autogpt.memory.vector import MemoryItem, get_memory
|
||||
from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
|
||||
from autogpt.url_utils.validators import validate_url
|
||||
|
||||
BrowserOptions = ChromeOptions | EdgeOptions | FirefoxOptions | SafariOptions
|
||||
|
||||
FILE_DIR = Path(__file__).parent.parent
|
||||
TOKENS_TO_TRIGGER_SUMMARY = 50
|
||||
LINKS_TO_RETURN = 20
|
||||
|
||||
|
||||
@command(
|
||||
@@ -64,25 +67,30 @@ def browse_website(url: str, question: str, agent: Agent) -> str:
|
||||
question (str): The question asked by the user
|
||||
|
||||
Returns:
|
||||
Tuple[str, WebDriver]: The answer and links to the user and the webdriver
|
||||
str: The answer and links to the user and the webdriver
|
||||
"""
|
||||
driver = None
|
||||
try:
|
||||
driver, text = scrape_text_with_selenium(url, agent)
|
||||
add_header(driver)
|
||||
if TOKENS_TO_TRIGGER_SUMMARY < count_string_tokens(text, agent.llm.name):
|
||||
text = summarize_memorize_webpage(url, text, question, agent, driver)
|
||||
|
||||
links = scrape_links_with_selenium(driver, url)
|
||||
|
||||
# Limit links to LINKS_TO_RETURN
|
||||
if len(links) > LINKS_TO_RETURN:
|
||||
links = links[:LINKS_TO_RETURN]
|
||||
|
||||
return f"Answer gathered from website: {text}\n\nLinks: {links}"
|
||||
except WebDriverException as e:
|
||||
# These errors are often quite long and include lots of context.
|
||||
# Just grab the first line.
|
||||
msg = e.msg.split("\n")[0]
|
||||
return f"Error: {msg}"
|
||||
|
||||
add_header(driver)
|
||||
summary = summarize_memorize_webpage(url, text, question, agent, driver)
|
||||
links = scrape_links_with_selenium(driver, url)
|
||||
|
||||
# Limit links to 5
|
||||
if len(links) > 5:
|
||||
links = links[:5]
|
||||
close_browser(driver)
|
||||
return f"Answer gathered from website: {summary}\n\nLinks: {links}"
|
||||
finally:
|
||||
if driver:
|
||||
close_browser(driver)
|
||||
|
||||
|
||||
def scrape_text_with_selenium(url: str, agent: Agent) -> tuple[WebDriver, str]:
|
||||
@@ -96,7 +104,7 @@ def scrape_text_with_selenium(url: str, agent: Agent) -> tuple[WebDriver, str]:
|
||||
"""
|
||||
logging.getLogger("selenium").setLevel(logging.CRITICAL)
|
||||
|
||||
options_available: dict[str, Type[BrowserOptions]] = {
|
||||
options_available: dict[str, BrowserOptions] = {
|
||||
"chrome": ChromeOptions,
|
||||
"edge": EdgeOptions,
|
||||
"firefox": FirefoxOptions,
|
||||
|
||||
Reference in New Issue
Block a user