mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
3 Commits
openhands/
...
openhands-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
55797c834f | ||
|
|
50c7125c36 | ||
|
|
8b28ebdd5f |
@@ -61,63 +61,93 @@ class BrowserEnv:
|
||||
try:
|
||||
self.process = multiprocessing.Process(target=self.browser_process)
|
||||
self.process.start()
|
||||
|
||||
# Give the process a moment to initialize before checking alive status
|
||||
# This might be too long?
|
||||
time.sleep(1)
|
||||
|
||||
# Check if process is still running
|
||||
if not self.process.is_alive():
|
||||
exitcode = self.process.exitcode
|
||||
logger.error(f'Browser process failed to start (exit code: {exitcode})')
|
||||
raise BrowserInitException(f'Browser process failed to start with exit code {exitcode}')
|
||||
|
||||
# Now check if we can communicate with it
|
||||
if not self.check_alive(timeout=200):
|
||||
logger.error('Browser process started but not responding')
|
||||
self.close()
|
||||
raise BrowserInitException('Failed to start browser environment: process not responding')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to start browser process: {e}')
|
||||
self.close()
|
||||
raise
|
||||
|
||||
if not self.check_alive(timeout=200):
|
||||
self.close()
|
||||
raise BrowserInitException('Failed to start browser environment.')
|
||||
|
||||
def browser_process(self) -> None:
|
||||
if self.eval_mode:
|
||||
assert self.browsergym_eval_env is not None
|
||||
logger.info('Initializing browser env for web browsing evaluation.')
|
||||
if not self.browsergym_eval_env.startswith('browsergym/'):
|
||||
self.browsergym_eval_env = 'browsergym/' + self.browsergym_eval_env
|
||||
if 'visualwebarena' in self.browsergym_eval_env:
|
||||
import browsergym.visualwebarena # noqa F401 register visualwebarena tasks as gym environments
|
||||
import nltk
|
||||
try:
|
||||
if self.eval_mode:
|
||||
assert self.browsergym_eval_env is not None
|
||||
logger.info('Initializing browser env for web browsing evaluation.')
|
||||
if not self.browsergym_eval_env.startswith('browsergym/'):
|
||||
self.browsergym_eval_env = 'browsergym/' + self.browsergym_eval_env
|
||||
if 'visualwebarena' in self.browsergym_eval_env:
|
||||
import browsergym.visualwebarena # noqa F401 register visualwebarena tasks as gym environments
|
||||
import nltk
|
||||
|
||||
nltk.download('punkt_tab')
|
||||
elif 'webarena' in self.browsergym_eval_env:
|
||||
import browsergym.webarena # noqa F401 register webarena tasks as gym environments
|
||||
elif 'miniwob' in self.browsergym_eval_env:
|
||||
import browsergym.miniwob # noqa F401 register miniwob tasks as gym environments
|
||||
nltk.download('punkt_tab')
|
||||
elif 'webarena' in self.browsergym_eval_env:
|
||||
import browsergym.webarena # noqa F401 register webarena tasks as gym environments
|
||||
elif 'miniwob' in self.browsergym_eval_env:
|
||||
import browsergym.miniwob # noqa F401 register miniwob tasks as gym environments
|
||||
else:
|
||||
raise ValueError(
|
||||
f'Unsupported browsergym eval env: {self.browsergym_eval_env}'
|
||||
)
|
||||
env = gym.make(self.browsergym_eval_env, tags_to_mark='all', timeout=100000)
|
||||
else:
|
||||
raise ValueError(
|
||||
f'Unsupported browsergym eval env: {self.browsergym_eval_env}'
|
||||
logger.info('Initializing browser env for open-ended browsing')
|
||||
env = gym.make(
|
||||
'browsergym/openended',
|
||||
task_kwargs={'start_url': 'about:blank', 'goal': 'PLACEHOLDER_GOAL'},
|
||||
wait_for_user_message=False,
|
||||
headless=True,
|
||||
disable_env_checker=True,
|
||||
tags_to_mark='all',
|
||||
)
|
||||
env = gym.make(self.browsergym_eval_env, tags_to_mark='all', timeout=100000)
|
||||
else:
|
||||
env = gym.make(
|
||||
'browsergym/openended',
|
||||
task_kwargs={'start_url': 'about:blank', 'goal': 'PLACEHOLDER_GOAL'},
|
||||
wait_for_user_message=False,
|
||||
headless=True,
|
||||
disable_env_checker=True,
|
||||
tags_to_mark='all',
|
||||
)
|
||||
obs, info = env.reset()
|
||||
|
||||
logger.info('Successfully called env.reset')
|
||||
# EVAL ONLY: save the goal into file for evaluation
|
||||
self.eval_goal = None
|
||||
self.goal_image_urls = []
|
||||
self.eval_rewards: list[float] = []
|
||||
if self.eval_mode:
|
||||
self.eval_goal = obs['goal']
|
||||
if 'goal_object' in obs:
|
||||
if len(obs['goal_object']) > 0:
|
||||
self.eval_goal = obs['goal_object'][0]['text']
|
||||
for message in obs['goal_object']:
|
||||
if message['type'] == 'image_url':
|
||||
image_src = message['image_url']
|
||||
if isinstance(image_src, dict):
|
||||
image_src = image_src['url']
|
||||
self.goal_image_urls.append(image_src)
|
||||
logger.debug(f'Browsing goal: {self.eval_goal}')
|
||||
logger.info('Browser env started.')
|
||||
# Log successful environment creation
|
||||
logger.info('Successfully created browser environment')
|
||||
|
||||
# Reset environment and get initial observation
|
||||
obs, info = env.reset()
|
||||
logger.info('Successfully called env.reset')
|
||||
|
||||
# EVAL ONLY: save the goal into file for evaluation
|
||||
self.eval_goal = None
|
||||
self.goal_image_urls = []
|
||||
self.eval_rewards: list[float] = []
|
||||
|
||||
if self.eval_mode:
|
||||
self.eval_goal = obs['goal']
|
||||
if 'goal_object' in obs:
|
||||
if len(obs['goal_object']) > 0:
|
||||
self.eval_goal = obs['goal_object'][0]['text']
|
||||
for message in obs['goal_object']:
|
||||
if message['type'] == 'image_url':
|
||||
image_src = message['image_url']
|
||||
if isinstance(image_src, dict):
|
||||
image_src = image_src['url']
|
||||
self.goal_image_urls.append(image_src)
|
||||
logger.debug(f'Browsing goal: {self.eval_goal}')
|
||||
logger.info('Browser env started.')
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to initialize browser environment: {e}')
|
||||
# Ensure we close the pipe on our side before exiting
|
||||
try:
|
||||
self.browser_side.send(('ERROR', str(e)))
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
|
||||
while should_continue():
|
||||
try:
|
||||
|
||||
140
tests/unit/test_browser_env.py
Normal file
140
tests/unit/test_browser_env.py
Normal file
@@ -0,0 +1,140 @@
|
||||
"""Tests for browser environment initialization."""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import tenacity
|
||||
|
||||
from openhands.core.exceptions import BrowserInitException
|
||||
from openhands.runtime.browser.browser_env import BrowserEnv
|
||||
|
||||
|
||||
def test_browser_init_success():
|
||||
"""Test successful browser initialization."""
|
||||
with patch('multiprocessing.Process') as mock_process:
|
||||
# Mock process to appear alive
|
||||
mock_process_instance = MagicMock()
|
||||
mock_process_instance.is_alive.return_value = True
|
||||
mock_process.return_value = mock_process_instance
|
||||
|
||||
# Mock pipe communication
|
||||
mock_pipe = MagicMock()
|
||||
mock_pipe.poll.return_value = True
|
||||
mock_pipe.recv.return_value = ('ALIVE', None)
|
||||
|
||||
with patch('multiprocessing.Pipe', return_value=(mock_pipe, mock_pipe)):
|
||||
browser = BrowserEnv()
|
||||
assert browser.process.is_alive()
|
||||
browser.close()
|
||||
|
||||
|
||||
def test_browser_init_process_failure():
|
||||
"""Test browser initialization when process fails to start."""
|
||||
with patch('multiprocessing.Process') as mock_process:
|
||||
# Mock process to appear dead with error code
|
||||
mock_process_instance = MagicMock()
|
||||
mock_process_instance.is_alive.return_value = False
|
||||
mock_process_instance.exitcode = -11 # Segmentation fault
|
||||
mock_process.return_value = mock_process_instance
|
||||
|
||||
with patch('multiprocessing.Pipe', return_value=(MagicMock(), MagicMock())):
|
||||
with pytest.raises(tenacity.RetryError) as exc_info:
|
||||
BrowserEnv()
|
||||
# Get the actual exception from the retry error
|
||||
retry_error = exc_info.value
|
||||
assert isinstance(
|
||||
retry_error.last_attempt.exception(), BrowserInitException
|
||||
)
|
||||
assert 'exit code -11' in str(retry_error.last_attempt.exception())
|
||||
|
||||
|
||||
def test_browser_init_communication_failure():
|
||||
"""Test browser initialization when process starts but communication fails."""
|
||||
with patch('multiprocessing.Process') as mock_process:
|
||||
# Mock process to appear alive but not responding
|
||||
mock_process_instance = MagicMock()
|
||||
mock_process_instance.is_alive.return_value = True
|
||||
mock_process.return_value = mock_process_instance
|
||||
|
||||
# Mock pipe to never receive response
|
||||
mock_pipe = MagicMock()
|
||||
mock_pipe.poll.return_value = False
|
||||
|
||||
with patch('multiprocessing.Pipe', return_value=(mock_pipe, mock_pipe)):
|
||||
with pytest.raises(tenacity.RetryError) as exc_info:
|
||||
BrowserEnv()
|
||||
# Get the actual exception from the retry error
|
||||
retry_error = exc_info.value
|
||||
assert isinstance(
|
||||
retry_error.last_attempt.exception(), BrowserInitException
|
||||
)
|
||||
assert 'not responding' in str(retry_error.last_attempt.exception())
|
||||
|
||||
|
||||
def test_browser_init_error_handling():
|
||||
"""Test error handling during browser initialization."""
|
||||
with patch('multiprocessing.Process') as mock_process:
|
||||
# Mock process to raise an error
|
||||
mock_process_instance = MagicMock()
|
||||
mock_process_instance.start.side_effect = OSError('Failed to start process')
|
||||
mock_process.return_value = mock_process_instance
|
||||
|
||||
with patch('multiprocessing.Pipe', return_value=(MagicMock(), MagicMock())):
|
||||
with pytest.raises(OSError) as exc_info:
|
||||
BrowserEnv()
|
||||
assert 'Failed to start process' in str(exc_info.value)
|
||||
|
||||
|
||||
def test_browser_init_retry():
|
||||
"""Test that browser initialization retries on failure."""
|
||||
with patch('multiprocessing.Process') as mock_process:
|
||||
# Create a list of mock process instances that all fail
|
||||
mock_instances = []
|
||||
for _ in range(5): # All 5 attempts fail
|
||||
instance = MagicMock()
|
||||
instance.is_alive.return_value = False
|
||||
instance.exitcode = 1
|
||||
mock_instances.append(instance)
|
||||
|
||||
mock_process.side_effect = mock_instances
|
||||
|
||||
# Mock pipe that never responds
|
||||
mock_pipe = MagicMock()
|
||||
mock_pipe.poll.return_value = False
|
||||
|
||||
with patch('multiprocessing.Pipe', return_value=(mock_pipe, mock_pipe)):
|
||||
with pytest.raises(tenacity.RetryError) as exc_info:
|
||||
BrowserEnv()
|
||||
# Get the actual exception from the retry error
|
||||
retry_error = exc_info.value
|
||||
assert isinstance(
|
||||
retry_error.last_attempt.exception(), BrowserInitException
|
||||
)
|
||||
assert 'exit code 1' in str(retry_error.last_attempt.exception())
|
||||
|
||||
|
||||
def test_browser_close_cleanup():
|
||||
"""Test that browser close properly cleans up resources."""
|
||||
with patch('multiprocessing.Process') as mock_process:
|
||||
# Mock process
|
||||
mock_process_instance = MagicMock()
|
||||
mock_process_instance.is_alive.side_effect = [
|
||||
True,
|
||||
True,
|
||||
False,
|
||||
] # Alive then dead after join
|
||||
mock_process.return_value = mock_process_instance
|
||||
|
||||
# Mock pipe
|
||||
mock_pipe = MagicMock()
|
||||
mock_pipe.poll.return_value = True
|
||||
mock_pipe.recv.return_value = ('ALIVE', None)
|
||||
|
||||
with patch('multiprocessing.Pipe', return_value=(mock_pipe, mock_pipe)):
|
||||
browser = BrowserEnv()
|
||||
browser.close()
|
||||
|
||||
# Verify cleanup
|
||||
assert mock_pipe.close.call_count == 2 # Both sides of pipe closed
|
||||
mock_process_instance.join.assert_called()
|
||||
mock_process_instance.terminate.assert_not_called() # Should not need force
|
||||
Reference in New Issue
Block a user