Compare commits

...

22 Commits

Author SHA1 Message Date
Xingyao Wang
d88ebc971f override browsergym playwright import 2025-05-15 17:02:07 +00:00
Xingyao Wang
46e39739c4 update lock 2025-05-15 16:46:12 +00:00
Xingyao Wang
eda353e0d7 Merge commit '04d585513c8952efbe56aaefbf11754846320360' into feature/patchright-integration 2025-05-15 16:45:35 +00:00
Xingyao Wang
c1e3ae6dac try to simplify 2025-05-15 06:09:22 +00:00
Xingyao Wang
e95831ec09 change to patchright_chromium 2025-05-15 05:48:13 +00:00
Xingyao Wang
04c7f31498 move unit test 2025-05-15 05:47:48 +00:00
Xingyao Wang
e5f81a283a Merge commit '74f8d68ac57b2c9df7aa8bd9009b51eefac69201' into feature/patchright-integration 2025-05-14 19:53:55 +00:00
openhands
74f8d68ac5 Regenerate poetry.lock file after merging from main 2025-05-14 19:52:40 +00:00
Xingyao Wang
a2e028d707 remove patchright readme 2025-05-14 19:52:14 +00:00
openhands
8eb2281f21 Merge main into feature/patchright-integration 2025-05-14 19:51:46 +00:00
openhands
1c3052702e Regenerate poetry.lock file after merging from main 2025-05-14 10:19:51 +00:00
openhands
ca4051f585 Merge main into feature/patchright-integration 2025-05-14 10:19:00 +00:00
openhands
11c7a39c12 Remove poetry.lock for regeneration 2025-05-14 10:18:48 +00:00
openhands
0ec75bc0d0 Regenerate poetry.lock file to fix compatibility issues with patchright integration 2025-05-13 16:51:37 +00:00
openhands
9ca9cb8f3a Merge main into feature/patchright-integration 2025-05-13 16:33:57 +00:00
openhands
a95e43fc03 Fix patchright integration tests and improve documentation 2025-05-13 11:30:02 +00:00
openhands
c5f9910dc2 Fix test_patchright_import by importing playwright.sync_api after use_patchright() 2025-05-13 11:17:34 +00:00
openhands
ca5df82804 Fix linting and test issues in patchright integration 2025-05-13 11:12:18 +00:00
openhands
a4b8d08b2f Fix linting and test issues in patchright integration 2025-05-13 11:08:05 +00:00
openhands
72a13cc42d Add patchright to pyproject.toml and update Makefile to use it 2025-05-09 14:22:06 +00:00
openhands
728f8e239c Update Makefile to use patchright instead of playwright for Chromium installation 2025-05-09 14:20:09 +00:00
openhands
24c93478ac Add utility to use patchright as a drop-in replacement for playwright 2025-05-09 14:17:12 +00:00
6 changed files with 2471 additions and 2102 deletions

View File

@@ -154,21 +154,20 @@ install-python-dependencies:
fi
@if [ "${INSTALL_PLAYWRIGHT}" != "false" ] && [ "${INSTALL_PLAYWRIGHT}" != "0" ]; then \
if [ -f "/etc/manjaro-release" ]; then \
echo "$(BLUE)Detected Manjaro Linux. Installing Playwright dependencies...$(RESET)"; \
poetry run pip install playwright; \
poetry run playwright install chromium; \
echo "$(BLUE)Detected Manjaro Linux. Installing Patchright dependencies...$(RESET)"; \
poetry run patchright install chromium; \
else \
if [ ! -f cache/playwright_chromium_is_installed.txt ]; then \
echo "Running playwright install --with-deps chromium..."; \
poetry run playwright install --with-deps chromium; \
if [ ! -f cache/patchright_chromium_is_installed.txt ]; then \
echo "Installing patchright chromium..."; \
poetry run patchright install chromium; \
mkdir -p cache; \
touch cache/playwright_chromium_is_installed.txt; \
touch cache/patchright_chromium_is_installed.txt; \
else \
echo "Setup already done. Skipping playwright installation."; \
echo "Setup already done. Skipping patchright installation."; \
fi \
fi \
else \
echo "Skipping Playwright installation (INSTALL_PLAYWRIGHT=${INSTALL_PLAYWRIGHT})."; \
echo "Skipping Patchright installation (INSTALL_PLAYWRIGHT=${INSTALL_PLAYWRIGHT})."; \
fi
@echo "$(GREEN)Python dependencies installed successfully.$(RESET)"

View File

@@ -4,6 +4,10 @@ import multiprocessing
import time
import uuid
from openhands.utils.playwright_patchright_util import use_patchright # noqa F401
use_patchright() # noqa F401
# NOTE: this overrides the playwright import in browsergym
import browsergym.core # noqa F401 (we register the openended task as a gym environment)
import gymnasium as gym
import html2text

View File

@@ -0,0 +1,164 @@
#!/usr/bin/env python3
"""Utility to use patchright as a drop-in replacement for playwright.
This module provides a function to patch all imports of playwright to use patchright instead.
It uses Python's import system to intercept imports of playwright modules and redirect them
to the corresponding patchright modules.
Usage:
from openhands.utils.playwright_patchright_util import use_patchright
use_patchright() # Call this before any imports of browsergym or playwright
"""
import importlib
import importlib.abc
import importlib.machinery
import importlib.util
import logging
import sys
import types
from typing import Optional, Sequence
logger = logging.getLogger(__name__)
class PlaywrightToPatchrightLoader(importlib.abc.Loader):
"""Custom loader that loads a patchright module but presents it as a playwright module."""
def __init__(
self, fullname: str, patchright_name: str, original_loader: importlib.abc.Loader
):
self.fullname = fullname
self.patchright_name = patchright_name
self.original_loader = original_loader
def create_module(
self, spec: importlib.machinery.ModuleSpec
) -> Optional[types.ModuleType]:
"""Create a module object for the patchright module."""
try:
# Import the patchright module and return it directly
return importlib.import_module(self.patchright_name)
except ImportError as e:
logger.warning(f'Failed to import {self.patchright_name}: {e}')
return None
def exec_module(self, module: types.ModuleType) -> None:
"""Execute the module (nothing to do here as we already set up the module)."""
pass
class PlaywrightToPatchrightFinder(importlib.abc.MetaPathFinder):
"""Custom finder that intercepts imports of playwright modules and redirects them to patchright."""
def find_spec(
self,
fullname: str,
path: Optional[Sequence[str]] = None,
target: Optional[types.ModuleType] = None,
) -> Optional[importlib.machinery.ModuleSpec]:
"""Find the module spec for the given module name."""
# Only handle playwright modules
if not fullname.startswith('playwright'):
return None
# Replace playwright prefix with patchright
patchright_name = 'patchright' + fullname[len('playwright') :]
try:
# Try to find the spec for the patchright module
spec = importlib.util.find_spec(patchright_name)
if spec is None:
return None
# Create a loader that will load the patchright module
loader = PlaywrightToPatchrightLoader(
fullname, patchright_name, spec.loader or importlib.abc.Loader()
)
# Create a new spec with the original name but using our custom loader
new_spec = importlib.machinery.ModuleSpec(
name=fullname,
loader=loader,
origin=spec.origin,
loader_state=spec.loader_state,
is_package=spec.submodule_search_locations is not None,
)
# Set submodule_search_locations if it's a package
if spec.submodule_search_locations is not None:
new_spec.submodule_search_locations = spec.submodule_search_locations
return new_spec
except (ImportError, AttributeError) as e:
logger.warning(f'Failed to find spec for {patchright_name}: {e}')
return None
def use_patchright():
"""Configure the system to use patchright as a drop-in replacement for playwright.
This function:
1. Checks if patchright is installed
2. Removes any existing playwright modules from sys.modules
3. Installs a meta path finder to redirect imports
Call this function before importing any modules that use playwright.
"""
# Check if patchright is installed
try:
importlib.import_module('patchright')
except ImportError:
logger.error(
"Patchright is not installed. Please install it with 'pip install patchright'."
)
raise ImportError(
"Patchright is not installed. Please install it with 'pip install patchright'."
)
# Remove any existing playwright modules from sys.modules
playwright_modules = [
name
for name in list(sys.modules.keys())
if name == 'playwright' or name.startswith('playwright.')
]
for name in playwright_modules:
del sys.modules[name]
# Install our custom finder at the beginning of sys.meta_path
for i, finder in enumerate(sys.meta_path):
if isinstance(finder, PlaywrightToPatchrightFinder):
# Already installed
return
# Add our finder to the beginning of sys.meta_path
sys.meta_path.insert(0, PlaywrightToPatchrightFinder())
logger.info('Patchright will be used as a drop-in replacement for playwright.')
if __name__ == '__main__':
# Configure logging
logging.basicConfig(level=logging.INFO)
# Test the utility
use_patchright()
# Try importing playwright
import playwright
print(f'Imported module: {playwright.__name__}')
print(f'Module file: {playwright.__file__}')
# Try importing sync_api
from playwright.sync_api import sync_playwright
print(f'sync_playwright function: {sync_playwright}')
# Use playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.goto('https://example.com')
print(f'Page title: {page.title()}')
browser.close()

4315
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -35,6 +35,7 @@ types-toml = "*"
numpy = "*"
json-repair = "*"
browsergym-core = "0.13.3" # integrate browsergym-core as the browsing interface
patchright = "*" # undetected version of playwright for browser automation
html2text = "*"
e2b = ">=1.0.5,<1.4.0"
pexpect = "*"

View File

@@ -0,0 +1,72 @@
#!/usr/bin/env python3
"""Test script for the playwright_patchright_util module."""
import logging
import sys
from openhands.utils.playwright_patchright_util import use_patchright
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def test_patchright_import():
"""Test that patchright can be imported and used as a replacement for playwright."""
# Use patchright as a replacement for playwright
use_patchright()
# Now import playwright - this must be imported after use_patchright() is called
import playwright.sync_api # noqa: F401
# Check that the import worked
assert 'playwright.sync_api' in sys.modules
# Check that the actual module is patchright
playwright_modules = [
name
for name in sys.modules.keys()
if name == 'playwright' or name.startswith('playwright.')
]
assert len(playwright_modules) > 0
# Check that patchright modules are loaded
patchright_modules = [
name
for name in sys.modules.keys()
if name == 'patchright' or name.startswith('patchright.')
]
assert len(patchright_modules) > 0
def test_patchright_functionality():
"""Test that patchright functionality works through the playwright import."""
# Use patchright as a replacement for playwright
use_patchright()
# Import playwright - this must be imported after use_patchright() is called
import playwright
from playwright.sync_api import sync_playwright
# print the actual package name and file
print(f'Actual playwright package name: {playwright.__name__}')
print(f'Actual playwright package file: {playwright.__file__}')
assert 'patchright' in playwright.__file__
# Use playwright (which is actually patchright)
with sync_playwright() as p:
# Launch a browser
browser = p.chromium.launch(headless=True)
# Create a new page
page = browser.new_page()
# Navigate to a URL
page.goto('https://example.com')
# Check that we can get the title
title = page.title()
assert 'Example' in title
# Close the browser
browser.close()