Merge pull request #1473 from ickma/support-headless-chrome-mode

Add support for running Chrome in Headless mode.
This commit is contained in:
Reinier van der Leer
2023-04-19 01:14:49 +02:00
committed by GitHub
3 changed files with 19 additions and 6 deletions

View File

@@ -9,9 +9,6 @@ BROWSE_CHUNK_MAX_LENGTH=8192
# USER_AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
# AI_SETTINGS_FILE - Specifies which AI Settings file to use (defaults to ai_settings.yaml)
AI_SETTINGS_FILE=ai_settings.yaml
# USE_WEB_BROWSER - Sets the web-browser drivers to use with selenium (defaults to chrome).
# Note: set this to either 'chrome', 'firefox', or 'safari' depending on your current browser
# USE_WEB_BROWSER=chrome
################################################################################
### LLM PROVIDER
@@ -134,9 +131,16 @@ GITHUB_API_KEY=github_pat_123
GITHUB_USERNAME=your-github-username
################################################################################
### SEARCH PROVIDER
### WEB BROWSING
################################################################################
### BROWSER
# USE_WEB_BROWSER - Sets the web-browser drivers to use with selenium (defaults to chrome).
# HEADLESS_BROWSER - Whether to run the browser in headless mode (defaults to True)
# Note: set this to either 'chrome', 'firefox', or 'safari' depending on your current browser
# USE_WEB_BROWSER=chrome
# HEADLESS_BROWSER=True
### GOOGLE
# GOOGLE_API_KEY - Google API key (Example: my-google-api-key)
# CUSTOM_SEARCH_ENGINE_ID - Custom search engine ID (Example: my-custom-search-engine-id)

View File

@@ -81,7 +81,12 @@ def scrape_text_with_selenium(url: str) -> tuple[WebDriver, str]:
if platform == "linux" or platform == "linux2":
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--remote-debugging-port=9222")
options.add_argument("--no-sandbox")
if CFG.selenium_headless:
options.add_argument("--headless")
options.add_argument("--disable-gpu")
driver = webdriver.Chrome(
executable_path=ChromeDriverManager().install(), options=options
)

View File

@@ -26,7 +26,6 @@ class Config(metaclass=Singleton):
self.allow_downloads = False
self.skip_news = False
self.selenium_web_browser = os.getenv("USE_WEB_BROWSER", "chrome")
self.ai_settings_file = os.getenv("AI_SETTINGS_FILE", "ai_settings.yaml")
self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo")
self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4")
@@ -88,7 +87,11 @@ class Config(metaclass=Singleton):
"HUGGINGFACE_AUDIO_TO_TEXT_MODEL"
)
# User agent headers to use when browsing web
# Selenium browser settings
self.selenium_web_browser = os.getenv("USE_WEB_BROWSER", "chrome")
self.selenium_headless = os.getenv("HEADLESS_BROWSER", "True") == "True"
# User agent header to use when making HTTP requests
# Some websites might just completely deny request with an error code if
# no user agent was found.
self.user_agent = os.getenv(
@@ -96,6 +99,7 @@ class Config(metaclass=Singleton):
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
)
self.redis_host = os.getenv("REDIS_HOST", "localhost")
self.redis_port = os.getenv("REDIS_PORT", "6379")
self.redis_password = os.getenv("REDIS_PASSWORD", "")