mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-09 14:57:59 -05:00
Add selected_repo to command line (#6949)
This commit is contained in:
5
.github/workflows/integration-runner.yml
vendored
5
.github/workflows/integration-runner.yml
vendored
@@ -40,6 +40,11 @@ jobs:
|
|||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
cache: "poetry"
|
cache: "poetry"
|
||||||
|
|
||||||
|
- name: Setup Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: '22.x'
|
||||||
|
|
||||||
- name: Comment on PR if 'integration-test' label is present
|
- name: Comment on PR if 'integration-test' label is present
|
||||||
if: github.event_name == 'pull_request' && github.event.label.name == 'integration-test'
|
if: github.event_name == 'pull_request' && github.event.label.name == 'integration-test'
|
||||||
uses: KeisukeYamashita/create-comment@v1
|
uses: KeisukeYamashita/create-comment@v1
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ from openhands.core.config import (
|
|||||||
from openhands.core.logger import openhands_logger as logger
|
from openhands.core.logger import openhands_logger as logger
|
||||||
from openhands.core.main import create_runtime, run_controller
|
from openhands.core.main import create_runtime, run_controller
|
||||||
from openhands.events.action import MessageAction
|
from openhands.events.action import MessageAction
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
game = None
|
game = None
|
||||||
|
|
||||||
@@ -122,7 +121,6 @@ def process_instance(
|
|||||||
|
|
||||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
|
|
||||||
state: State | None = asyncio.run(
|
state: State | None = asyncio.run(
|
||||||
run_controller(
|
run_controller(
|
||||||
|
|||||||
@@ -34,7 +34,6 @@ from openhands.core.main import create_runtime, run_controller
|
|||||||
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
|
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
|
||||||
from openhands.events.observation import CmdOutputObservation
|
from openhands.events.observation import CmdOutputObservation
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
|
|
||||||
def get_config(
|
def get_config(
|
||||||
@@ -211,7 +210,6 @@ def process_instance(
|
|||||||
# =============================================
|
# =============================================
|
||||||
|
|
||||||
runtime: Runtime = create_runtime(config)
|
runtime: Runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
|
|
||||||
initialize_runtime(runtime, instance=instance)
|
initialize_runtime(runtime, instance=instance)
|
||||||
|
|
||||||
|
|||||||
@@ -34,7 +34,6 @@ from openhands.core.main import create_runtime, run_controller
|
|||||||
from openhands.events.action import CmdRunAction, MessageAction
|
from openhands.events.action import CmdRunAction, MessageAction
|
||||||
from openhands.events.observation import CmdOutputObservation
|
from openhands.events.observation import CmdOutputObservation
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
# Configure visibility of unit tests to the Agent.
|
# Configure visibility of unit tests to the Agent.
|
||||||
USE_UNIT_TESTS = os.environ.get('USE_UNIT_TESTS', 'false').lower() == 'true'
|
USE_UNIT_TESTS = os.environ.get('USE_UNIT_TESTS', 'false').lower() == 'true'
|
||||||
@@ -204,7 +203,6 @@ def process_instance(
|
|||||||
# =============================================
|
# =============================================
|
||||||
|
|
||||||
runtime: Runtime = create_runtime(config)
|
runtime: Runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
|
|
||||||
initialize_runtime(runtime, instance=instance)
|
initialize_runtime(runtime, instance=instance)
|
||||||
|
|
||||||
|
|||||||
@@ -31,7 +31,6 @@ from openhands.core.main import create_runtime, run_controller
|
|||||||
from openhands.events.action import CmdRunAction, MessageAction
|
from openhands.events.action import CmdRunAction, MessageAction
|
||||||
from openhands.events.observation import CmdOutputObservation
|
from openhands.events.observation import CmdOutputObservation
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||||
'CodeActAgent': functools.partial(
|
'CodeActAgent': functools.partial(
|
||||||
@@ -275,7 +274,6 @@ def process_instance(
|
|||||||
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
|
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
|
||||||
|
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
initialize_runtime(runtime, instance)
|
initialize_runtime(runtime, instance)
|
||||||
|
|
||||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||||
|
|||||||
@@ -34,7 +34,6 @@ from openhands.core.main import create_runtime, run_controller
|
|||||||
from openhands.events.action import CmdRunAction, MessageAction
|
from openhands.events.action import CmdRunAction, MessageAction
|
||||||
from openhands.events.observation import CmdOutputObservation
|
from openhands.events.observation import CmdOutputObservation
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
|
|
||||||
def codeact_user_response(state: State) -> str:
|
def codeact_user_response(state: State) -> str:
|
||||||
@@ -400,7 +399,6 @@ def process_instance(
|
|||||||
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
|
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
|
||||||
|
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
initialize_runtime(runtime, instance)
|
initialize_runtime(runtime, instance)
|
||||||
|
|
||||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||||
|
|||||||
@@ -35,7 +35,6 @@ from openhands.events.action import CmdRunAction, MessageAction
|
|||||||
from openhands.events.observation import CmdOutputObservation, ErrorObservation
|
from openhands.events.observation import CmdOutputObservation, ErrorObservation
|
||||||
from openhands.events.serialization.event import event_to_dict
|
from openhands.events.serialization.event import event_to_dict
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
from openhands.utils.shutdown_listener import sleep_if_should_continue
|
from openhands.utils.shutdown_listener import sleep_if_should_continue
|
||||||
|
|
||||||
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
|
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
|
||||||
@@ -395,7 +394,6 @@ def process_instance(
|
|||||||
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
|
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
|
||||||
|
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
try:
|
try:
|
||||||
initialize_runtime(runtime, instance)
|
initialize_runtime(runtime, instance)
|
||||||
|
|
||||||
|
|||||||
@@ -34,7 +34,6 @@ from openhands.core.main import create_runtime, run_controller
|
|||||||
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
|
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
|
||||||
from openhands.events.observation import CmdOutputObservation
|
from openhands.events.observation import CmdOutputObservation
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
EVALUATION_LLM = 'gpt-4-1106-preview'
|
EVALUATION_LLM = 'gpt-4-1106-preview'
|
||||||
|
|
||||||
@@ -282,7 +281,6 @@ def process_instance(
|
|||||||
|
|
||||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
initialize_runtime(runtime, instance.data_files)
|
initialize_runtime(runtime, instance.data_files)
|
||||||
|
|
||||||
state: State | None = asyncio.run(
|
state: State | None = asyncio.run(
|
||||||
|
|||||||
@@ -31,7 +31,6 @@ from openhands.core.main import create_runtime, run_controller
|
|||||||
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
|
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
|
||||||
from openhands.events.observation import CmdOutputObservation
|
from openhands.events.observation import CmdOutputObservation
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
DATASET_CACHE_DIR = os.path.join(os.path.dirname(__file__), 'data')
|
DATASET_CACHE_DIR = os.path.join(os.path.dirname(__file__), 'data')
|
||||||
|
|
||||||
@@ -149,7 +148,6 @@ def process_instance(
|
|||||||
logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
|
logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
|
||||||
|
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
initialize_runtime(runtime, instance)
|
initialize_runtime(runtime, instance)
|
||||||
|
|
||||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||||
|
|||||||
@@ -26,7 +26,6 @@ from openhands.core.config import (
|
|||||||
from openhands.core.logger import openhands_logger as logger
|
from openhands.core.logger import openhands_logger as logger
|
||||||
from openhands.core.main import create_runtime, run_controller
|
from openhands.core.main import create_runtime, run_controller
|
||||||
from openhands.events.action import MessageAction
|
from openhands.events.action import MessageAction
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||||
'CodeActAgent': codeact_user_response,
|
'CodeActAgent': codeact_user_response,
|
||||||
@@ -83,7 +82,6 @@ def process_instance(
|
|||||||
|
|
||||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
state: State | None = asyncio.run(
|
state: State | None = asyncio.run(
|
||||||
run_controller(
|
run_controller(
|
||||||
config=config,
|
config=config,
|
||||||
|
|||||||
@@ -49,7 +49,6 @@ from openhands.events.action import (
|
|||||||
MessageAction,
|
MessageAction,
|
||||||
)
|
)
|
||||||
from openhands.events.observation import Observation
|
from openhands.events.observation import Observation
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
ACTION_FORMAT = """
|
ACTION_FORMAT = """
|
||||||
<<FINAL_ANSWER||
|
<<FINAL_ANSWER||
|
||||||
@@ -215,7 +214,6 @@ Ok now its time to start solving the question. Good luck!
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
state: State | None = asyncio.run(
|
state: State | None = asyncio.run(
|
||||||
run_controller(
|
run_controller(
|
||||||
config=config,
|
config=config,
|
||||||
|
|||||||
@@ -39,7 +39,6 @@ from openhands.core.main import create_runtime, run_controller
|
|||||||
from openhands.events.action import CmdRunAction, MessageAction
|
from openhands.events.action import CmdRunAction, MessageAction
|
||||||
from openhands.events.observation import CmdOutputObservation
|
from openhands.events.observation import CmdOutputObservation
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
IMPORT_HELPER = {
|
IMPORT_HELPER = {
|
||||||
'python': [
|
'python': [
|
||||||
@@ -233,7 +232,6 @@ def process_instance(
|
|||||||
|
|
||||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
initialize_runtime(runtime, instance)
|
initialize_runtime(runtime, instance)
|
||||||
state: State | None = asyncio.run(
|
state: State | None = asyncio.run(
|
||||||
run_controller(
|
run_controller(
|
||||||
|
|||||||
@@ -31,7 +31,6 @@ from openhands.events.action import (
|
|||||||
)
|
)
|
||||||
from openhands.events.observation import CmdOutputObservation
|
from openhands.events.observation import CmdOutputObservation
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||||
'CodeActAgent': codeact_user_response,
|
'CodeActAgent': codeact_user_response,
|
||||||
@@ -207,7 +206,6 @@ def process_instance(
|
|||||||
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
|
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
|
||||||
|
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
initialize_runtime(runtime, instance)
|
initialize_runtime(runtime, instance)
|
||||||
|
|
||||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||||
|
|||||||
@@ -41,7 +41,6 @@ from openhands.runtime.browser.browser_env import (
|
|||||||
BROWSER_EVAL_GET_GOAL_ACTION,
|
BROWSER_EVAL_GET_GOAL_ACTION,
|
||||||
BROWSER_EVAL_GET_REWARDS_ACTION,
|
BROWSER_EVAL_GET_REWARDS_ACTION,
|
||||||
)
|
)
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
SUPPORTED_AGENT_CLS = {'BrowsingAgent', 'CodeActAgent'}
|
SUPPORTED_AGENT_CLS = {'BrowsingAgent', 'CodeActAgent'}
|
||||||
|
|
||||||
@@ -146,7 +145,6 @@ def process_instance(
|
|||||||
logger.info(f'Starting evaluation for instance {env_id}.')
|
logger.info(f'Starting evaluation for instance {env_id}.')
|
||||||
|
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
task_str, obs = initialize_runtime(runtime)
|
task_str, obs = initialize_runtime(runtime)
|
||||||
|
|
||||||
task_str += (
|
task_str += (
|
||||||
|
|||||||
@@ -35,7 +35,6 @@ from openhands.events.action import (
|
|||||||
)
|
)
|
||||||
from openhands.events.observation import CmdOutputObservation
|
from openhands.events.observation import CmdOutputObservation
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
|
|
||||||
def codeact_user_response_mint(state: State, task: Task, task_config: dict[str, int]):
|
def codeact_user_response_mint(state: State, task: Task, task_config: dict[str, int]):
|
||||||
@@ -185,7 +184,6 @@ def process_instance(
|
|||||||
)
|
)
|
||||||
|
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
initialize_runtime(runtime)
|
initialize_runtime(runtime)
|
||||||
|
|
||||||
state: State | None = asyncio.run(
|
state: State | None = asyncio.run(
|
||||||
|
|||||||
@@ -43,7 +43,6 @@ from openhands.core.main import create_runtime, run_controller
|
|||||||
from openhands.events.action import CmdRunAction, MessageAction
|
from openhands.events.action import CmdRunAction, MessageAction
|
||||||
from openhands.events.observation import CmdOutputObservation
|
from openhands.events.observation import CmdOutputObservation
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
config = load_app_config()
|
config = load_app_config()
|
||||||
|
|
||||||
@@ -235,7 +234,6 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
|
|||||||
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
|
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
|
||||||
|
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
initialize_runtime(runtime, instance)
|
initialize_runtime(runtime, instance)
|
||||||
|
|
||||||
# Run the agent
|
# Run the agent
|
||||||
|
|||||||
@@ -29,7 +29,6 @@ from openhands.core.main import create_runtime, run_controller
|
|||||||
from openhands.events.action import CmdRunAction, MessageAction
|
from openhands.events.action import CmdRunAction, MessageAction
|
||||||
from openhands.events.observation import CmdOutputObservation
|
from openhands.events.observation import CmdOutputObservation
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||||
'CodeActAgent': codeact_user_response,
|
'CodeActAgent': codeact_user_response,
|
||||||
@@ -196,7 +195,6 @@ If the program uses some packages that are incompatible, please figure out alter
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
initialize_runtime(runtime, instance)
|
initialize_runtime(runtime, instance)
|
||||||
|
|
||||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||||
|
|||||||
@@ -40,7 +40,6 @@ from openhands.events.action import CmdRunAction, MessageAction
|
|||||||
from openhands.events.observation import CmdOutputObservation, ErrorObservation
|
from openhands.events.observation import CmdOutputObservation, ErrorObservation
|
||||||
from openhands.events.serialization.event import event_to_dict
|
from openhands.events.serialization.event import event_to_dict
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
from openhands.utils.shutdown_listener import sleep_if_should_continue
|
from openhands.utils.shutdown_listener import sleep_if_should_continue
|
||||||
|
|
||||||
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
|
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
|
||||||
@@ -422,7 +421,6 @@ def process_instance(
|
|||||||
f'This is the {runtime_failure_count + 1}th attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
|
f'This is the {runtime_failure_count + 1}th attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
|
||||||
)
|
)
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
initialize_runtime(runtime, instance)
|
initialize_runtime(runtime, instance)
|
||||||
|
|||||||
@@ -28,7 +28,6 @@ from openhands.core.main import create_runtime, run_controller
|
|||||||
from openhands.events.action import CmdRunAction, MessageAction
|
from openhands.events.action import CmdRunAction, MessageAction
|
||||||
from openhands.events.observation import BrowserOutputObservation, CmdOutputObservation
|
from openhands.events.observation import BrowserOutputObservation, CmdOutputObservation
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
|
|
||||||
def get_config(
|
def get_config(
|
||||||
@@ -276,7 +275,6 @@ if __name__ == '__main__':
|
|||||||
args.task_image_name, task_short_name, temp_dir, agent_llm_config, agent_config
|
args.task_image_name, task_short_name, temp_dir, agent_llm_config, agent_config
|
||||||
)
|
)
|
||||||
runtime: Runtime = create_runtime(config)
|
runtime: Runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
|
|
||||||
init_task_env(runtime, args.server_hostname, env_llm_config)
|
init_task_env(runtime, args.server_hostname, env_llm_config)
|
||||||
|
|
||||||
|
|||||||
@@ -27,7 +27,6 @@ from openhands.core.main import create_runtime, run_controller
|
|||||||
from openhands.events.action import CmdRunAction, MessageAction
|
from openhands.events.action import CmdRunAction, MessageAction
|
||||||
from openhands.events.observation import CmdOutputObservation
|
from openhands.events.observation import CmdOutputObservation
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||||
'CodeActAgent': codeact_user_response,
|
'CodeActAgent': codeact_user_response,
|
||||||
@@ -105,7 +104,6 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
|
|||||||
logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
|
logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
|
||||||
|
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
initialize_runtime(runtime)
|
initialize_runtime(runtime)
|
||||||
|
|
||||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||||
|
|||||||
@@ -37,7 +37,6 @@ from openhands.runtime.browser.browser_env import (
|
|||||||
BROWSER_EVAL_GET_GOAL_ACTION,
|
BROWSER_EVAL_GET_GOAL_ACTION,
|
||||||
BROWSER_EVAL_GET_REWARDS_ACTION,
|
BROWSER_EVAL_GET_REWARDS_ACTION,
|
||||||
)
|
)
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
SUPPORTED_AGENT_CLS = {'VisualBrowsingAgent'}
|
SUPPORTED_AGENT_CLS = {'VisualBrowsingAgent'}
|
||||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||||
@@ -160,7 +159,6 @@ def process_instance(
|
|||||||
logger.info(f'Starting evaluation for instance {env_id}.')
|
logger.info(f'Starting evaluation for instance {env_id}.')
|
||||||
|
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
task_str, goal_image_urls = initialize_runtime(runtime)
|
task_str, goal_image_urls = initialize_runtime(runtime)
|
||||||
initial_user_action = MessageAction(content=task_str, image_urls=goal_image_urls)
|
initial_user_action = MessageAction(content=task_str, image_urls=goal_image_urls)
|
||||||
state: State | None = asyncio.run(
|
state: State | None = asyncio.run(
|
||||||
|
|||||||
@@ -36,7 +36,6 @@ from openhands.runtime.browser.browser_env import (
|
|||||||
BROWSER_EVAL_GET_GOAL_ACTION,
|
BROWSER_EVAL_GET_GOAL_ACTION,
|
||||||
BROWSER_EVAL_GET_REWARDS_ACTION,
|
BROWSER_EVAL_GET_REWARDS_ACTION,
|
||||||
)
|
)
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
SUPPORTED_AGENT_CLS = {'BrowsingAgent'}
|
SUPPORTED_AGENT_CLS = {'BrowsingAgent'}
|
||||||
|
|
||||||
@@ -145,7 +144,6 @@ def process_instance(
|
|||||||
logger.info(f'Starting evaluation for instance {env_id}.')
|
logger.info(f'Starting evaluation for instance {env_id}.')
|
||||||
|
|
||||||
runtime = create_runtime(config)
|
runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
task_str = initialize_runtime(runtime)
|
task_str = initialize_runtime(runtime)
|
||||||
|
|
||||||
state: State | None = asyncio.run(
|
state: State | None = asyncio.run(
|
||||||
|
|||||||
@@ -30,7 +30,6 @@ from openhands.core.main import create_runtime, run_controller
|
|||||||
from openhands.events.action import MessageAction
|
from openhands.events.action import MessageAction
|
||||||
from openhands.events.serialization.event import event_to_dict
|
from openhands.events.serialization.event import event_to_dict
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.utils.async_utils import call_async_from_sync
|
|
||||||
|
|
||||||
FAKE_RESPONSES = {
|
FAKE_RESPONSES = {
|
||||||
'CodeActAgent': fake_user_response,
|
'CodeActAgent': fake_user_response,
|
||||||
@@ -109,7 +108,6 @@ def process_instance(
|
|||||||
# create sandbox and run the agent
|
# create sandbox and run the agent
|
||||||
# =============================================
|
# =============================================
|
||||||
runtime: Runtime = create_runtime(config)
|
runtime: Runtime = create_runtime(config)
|
||||||
call_async_from_sync(runtime.connect)
|
|
||||||
try:
|
try:
|
||||||
test_class.initialize_runtime(runtime)
|
test_class.initialize_runtime(runtime)
|
||||||
|
|
||||||
|
|||||||
@@ -102,9 +102,16 @@ async def main(loop: asyncio.AbstractEventLoop):
|
|||||||
sid = str(uuid4())
|
sid = str(uuid4())
|
||||||
display_message(f'Session ID: {sid}')
|
display_message(f'Session ID: {sid}')
|
||||||
|
|
||||||
runtime = create_runtime(config, sid=sid, headless_mode=True)
|
agent = create_agent(config)
|
||||||
await runtime.connect()
|
|
||||||
agent = create_agent(runtime, config)
|
runtime = create_runtime(
|
||||||
|
config,
|
||||||
|
sid=sid,
|
||||||
|
headless_mode=True,
|
||||||
|
agent=agent,
|
||||||
|
selected_repository=config.sandbox.selected_repo,
|
||||||
|
)
|
||||||
|
|
||||||
controller, _ = create_controller(agent, runtime, config)
|
controller, _ = create_controller(agent, runtime, config)
|
||||||
|
|
||||||
event_stream = runtime.event_stream
|
event_stream = runtime.event_stream
|
||||||
|
|||||||
@@ -71,5 +71,6 @@ class SandboxConfig(BaseModel):
|
|||||||
remote_runtime_resource_factor: int = Field(default=1)
|
remote_runtime_resource_factor: int = Field(default=1)
|
||||||
enable_gpu: bool = Field(default=False)
|
enable_gpu: bool = Field(default=False)
|
||||||
docker_runtime_kwargs: str | None = Field(default=None)
|
docker_runtime_kwargs: str | None = Field(default=None)
|
||||||
|
selected_repo: str | None = Field(default=None)
|
||||||
|
|
||||||
model_config = {'extra': 'forbid'}
|
model_config = {'extra': 'forbid'}
|
||||||
|
|||||||
@@ -475,9 +475,9 @@ def get_parser() -> argparse.ArgumentParser:
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-n',
|
'-n',
|
||||||
'--name',
|
'--name',
|
||||||
default='',
|
help='Session name',
|
||||||
type=str,
|
type=str,
|
||||||
help='Name for the session',
|
default='',
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--eval-ids',
|
'--eval-ids',
|
||||||
@@ -487,8 +487,15 @@ def get_parser() -> argparse.ArgumentParser:
|
|||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--no-auto-continue',
|
'--no-auto-continue',
|
||||||
|
help='Disable auto-continue responses in headless mode (i.e. headless will read from stdin instead of auto-continuing)',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Disable automatic "continue" responses in headless mode. Will read from stdin instead.',
|
default=False,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--selected-repo',
|
||||||
|
help='GitHub repository to clone (format: owner/repo)',
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
)
|
)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
@@ -555,4 +562,8 @@ def setup_config_from_args(args: argparse.Namespace) -> AppConfig:
|
|||||||
if args.max_budget_per_task is not None:
|
if args.max_budget_per_task is not None:
|
||||||
config.max_budget_per_task = args.max_budget_per_task
|
config.max_budget_per_task = args.max_budget_per_task
|
||||||
|
|
||||||
|
# Read selected repository in config for use by CLI and main.py
|
||||||
|
if args.selected_repo is not None:
|
||||||
|
config.sandbox.selected_repo = args.selected_repo
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|||||||
@@ -88,15 +88,20 @@ async def run_controller(
|
|||||||
"""
|
"""
|
||||||
sid = sid or generate_sid(config)
|
sid = sid or generate_sid(config)
|
||||||
|
|
||||||
|
if agent is None:
|
||||||
|
agent = create_agent(config)
|
||||||
|
|
||||||
if runtime is None:
|
if runtime is None:
|
||||||
runtime = create_runtime(config, sid=sid, headless_mode=headless_mode)
|
runtime = create_runtime(
|
||||||
await runtime.connect()
|
config,
|
||||||
|
sid=sid,
|
||||||
|
headless_mode=headless_mode,
|
||||||
|
agent=agent,
|
||||||
|
selected_repository=config.sandbox.selected_repo,
|
||||||
|
)
|
||||||
|
|
||||||
event_stream = runtime.event_stream
|
event_stream = runtime.event_stream
|
||||||
|
|
||||||
if agent is None:
|
|
||||||
agent = create_agent(runtime, config)
|
|
||||||
|
|
||||||
replay_events: list[Event] | None = None
|
replay_events: list[Event] | None = None
|
||||||
if config.replay_trajectory_path:
|
if config.replay_trajectory_path:
|
||||||
logger.info('Trajectory replay is enabled')
|
logger.info('Trajectory replay is enabled')
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
import hashlib
|
import hashlib
|
||||||
|
import os
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Tuple, Type
|
from typing import Tuple, Type
|
||||||
|
|
||||||
|
from pydantic import SecretStr
|
||||||
|
|
||||||
import openhands.agenthub # noqa F401 (we import this to get the agents registered)
|
import openhands.agenthub # noqa F401 (we import this to get the agents registered)
|
||||||
from openhands.controller import AgentController
|
from openhands.controller import AgentController
|
||||||
from openhands.controller.agent import Agent
|
from openhands.controller.agent import Agent
|
||||||
@@ -13,16 +16,21 @@ from openhands.core.logger import openhands_logger as logger
|
|||||||
from openhands.events import EventStream
|
from openhands.events import EventStream
|
||||||
from openhands.events.event import Event
|
from openhands.events.event import Event
|
||||||
from openhands.llm.llm import LLM
|
from openhands.llm.llm import LLM
|
||||||
|
from openhands.microagent.microagent import BaseMicroAgent
|
||||||
from openhands.runtime import get_runtime_cls
|
from openhands.runtime import get_runtime_cls
|
||||||
from openhands.runtime.base import Runtime
|
from openhands.runtime.base import Runtime
|
||||||
from openhands.security import SecurityAnalyzer, options
|
from openhands.security import SecurityAnalyzer, options
|
||||||
from openhands.storage import get_file_store
|
from openhands.storage import get_file_store
|
||||||
|
from openhands.utils.async_utils import call_async_from_sync
|
||||||
|
|
||||||
|
|
||||||
def create_runtime(
|
def create_runtime(
|
||||||
config: AppConfig,
|
config: AppConfig,
|
||||||
sid: str | None = None,
|
sid: str | None = None,
|
||||||
headless_mode: bool = True,
|
headless_mode: bool = True,
|
||||||
|
agent: Agent | None = None,
|
||||||
|
selected_repository: str | None = None,
|
||||||
|
github_token: SecretStr | None = None,
|
||||||
) -> Runtime:
|
) -> Runtime:
|
||||||
"""Create a runtime for the agent to run on.
|
"""Create a runtime for the agent to run on.
|
||||||
|
|
||||||
@@ -31,6 +39,8 @@ def create_runtime(
|
|||||||
Set it to incompatible value will cause unexpected behavior on RemoteRuntime.
|
Set it to incompatible value will cause unexpected behavior on RemoteRuntime.
|
||||||
headless_mode: Whether the agent is run in headless mode. `create_runtime` is typically called within evaluation scripts,
|
headless_mode: Whether the agent is run in headless mode. `create_runtime` is typically called within evaluation scripts,
|
||||||
where we don't want to have the VSCode UI open, so it defaults to True.
|
where we don't want to have the VSCode UI open, so it defaults to True.
|
||||||
|
selected_repository: (optional) The GitHub repository to use.
|
||||||
|
github_token: (optional) The GitHub token to use.
|
||||||
"""
|
"""
|
||||||
# if sid is provided on the command line, use it as the name of the event stream
|
# if sid is provided on the command line, use it as the name of the event stream
|
||||||
# otherwise generate it on the basis of the configured jwt_secret
|
# otherwise generate it on the basis of the configured jwt_secret
|
||||||
@@ -41,8 +51,17 @@ def create_runtime(
|
|||||||
file_store = get_file_store(config.file_store, config.file_store_path)
|
file_store = get_file_store(config.file_store, config.file_store_path)
|
||||||
event_stream = EventStream(session_id, file_store)
|
event_stream = EventStream(session_id, file_store)
|
||||||
|
|
||||||
|
# set up the security analyzer
|
||||||
|
if config.security.security_analyzer:
|
||||||
|
options.SecurityAnalyzers.get(
|
||||||
|
config.security.security_analyzer, SecurityAnalyzer
|
||||||
|
)(event_stream)
|
||||||
|
|
||||||
# agent class
|
# agent class
|
||||||
agent_cls = openhands.agenthub.Agent.get_cls(config.default_agent)
|
if agent:
|
||||||
|
agent_cls = type(agent)
|
||||||
|
else:
|
||||||
|
agent_cls = openhands.agenthub.Agent.get_cls(config.default_agent)
|
||||||
|
|
||||||
# runtime and tools
|
# runtime and tools
|
||||||
runtime_cls = get_runtime_cls(config.runtime)
|
runtime_cls = get_runtime_cls(config.runtime)
|
||||||
@@ -55,10 +74,38 @@ def create_runtime(
|
|||||||
headless_mode=headless_mode,
|
headless_mode=headless_mode,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
call_async_from_sync(runtime.connect)
|
||||||
|
|
||||||
|
# clone selected repository if provided
|
||||||
|
repo_directory = None
|
||||||
|
github_token = (
|
||||||
|
SecretStr(os.environ.get('GITHUB_TOKEN')) if not github_token else github_token
|
||||||
|
)
|
||||||
|
if selected_repository and github_token:
|
||||||
|
logger.debug(f'Selected repository {selected_repository}.')
|
||||||
|
repo_directory = runtime.clone_repo(
|
||||||
|
github_token,
|
||||||
|
selected_repository,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# load microagents from selected repository
|
||||||
|
if agent and agent.prompt_manager and selected_repository and repo_directory:
|
||||||
|
agent.prompt_manager.set_runtime_info(runtime)
|
||||||
|
microagents: list[BaseMicroAgent] = runtime.get_microagents_from_selected_repo(
|
||||||
|
selected_repository
|
||||||
|
)
|
||||||
|
agent.prompt_manager.load_microagents(microagents)
|
||||||
|
agent.prompt_manager.set_repository_info(selected_repository, repo_directory)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f'Runtime initialized with plugins: {[plugin.name for plugin in runtime.plugins]}'
|
||||||
|
)
|
||||||
|
|
||||||
return runtime
|
return runtime
|
||||||
|
|
||||||
|
|
||||||
def create_agent(runtime: Runtime, config: AppConfig) -> Agent:
|
def create_agent(config: AppConfig) -> Agent:
|
||||||
agent_cls: Type[Agent] = Agent.get_cls(config.default_agent)
|
agent_cls: Type[Agent] = Agent.get_cls(config.default_agent)
|
||||||
agent_config = config.get_agent_config(config.default_agent)
|
agent_config = config.get_agent_config(config.default_agent)
|
||||||
llm_config = config.get_llm_config_from_agent(config.default_agent)
|
llm_config = config.get_llm_config_from_agent(config.default_agent)
|
||||||
@@ -66,14 +113,6 @@ def create_agent(runtime: Runtime, config: AppConfig) -> Agent:
|
|||||||
llm=LLM(config=llm_config),
|
llm=LLM(config=llm_config),
|
||||||
config=agent_config,
|
config=agent_config,
|
||||||
)
|
)
|
||||||
if agent.prompt_manager:
|
|
||||||
microagents = runtime.get_microagents_from_selected_repo(None)
|
|
||||||
agent.prompt_manager.load_microagents(microagents)
|
|
||||||
|
|
||||||
if config.security.security_analyzer:
|
|
||||||
options.SecurityAnalyzers.get(
|
|
||||||
config.security.security_analyzer, SecurityAnalyzer
|
|
||||||
)(runtime.event_stream)
|
|
||||||
|
|
||||||
return agent
|
return agent
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ def test_parser_default_values():
|
|||||||
assert args.llm_config is None
|
assert args.llm_config is None
|
||||||
assert args.name == ''
|
assert args.name == ''
|
||||||
assert not args.no_auto_continue
|
assert not args.no_auto_continue
|
||||||
|
assert args.selected_repo is None
|
||||||
|
|
||||||
|
|
||||||
def test_parser_custom_values():
|
def test_parser_custom_values():
|
||||||
@@ -52,6 +53,8 @@ def test_parser_custom_values():
|
|||||||
'-n',
|
'-n',
|
||||||
'test_session',
|
'test_session',
|
||||||
'--no-auto-continue',
|
'--no-auto-continue',
|
||||||
|
'--selected-repo',
|
||||||
|
'owner/repo',
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -69,6 +72,7 @@ def test_parser_custom_values():
|
|||||||
assert args.name == 'test_session'
|
assert args.name == 'test_session'
|
||||||
assert args.no_auto_continue
|
assert args.no_auto_continue
|
||||||
assert args.version
|
assert args.version
|
||||||
|
assert args.selected_repo == 'owner/repo'
|
||||||
|
|
||||||
|
|
||||||
def test_parser_file_overrides_task():
|
def test_parser_file_overrides_task():
|
||||||
@@ -132,10 +136,18 @@ def test_help_message(capsys):
|
|||||||
'-n NAME, --name NAME',
|
'-n NAME, --name NAME',
|
||||||
'--config-file CONFIG_FILE',
|
'--config-file CONFIG_FILE',
|
||||||
'--no-auto-continue',
|
'--no-auto-continue',
|
||||||
|
'--selected-repo SELECTED_REPO',
|
||||||
]
|
]
|
||||||
|
|
||||||
for element in expected_elements:
|
for element in expected_elements:
|
||||||
assert element in help_output, f"Expected '{element}' to be in the help message"
|
assert element in help_output, f"Expected '{element}' to be in the help message"
|
||||||
|
|
||||||
option_count = help_output.count(' -')
|
option_count = help_output.count(' -')
|
||||||
assert option_count == 18, f'Expected 18 options, found {option_count}'
|
assert option_count == 19, f'Expected 19 options, found {option_count}'
|
||||||
|
|
||||||
|
|
||||||
|
def test_selected_repo_format():
|
||||||
|
"""Test that the selected-repo argument accepts owner/repo format."""
|
||||||
|
parser = get_parser()
|
||||||
|
args = parser.parse_args(['--selected-repo', 'owner/repo'])
|
||||||
|
assert args.selected_repo == 'owner/repo'
|
||||||
|
|||||||
Reference in New Issue
Block a user