Add selected_repo to command line (#6949)

2026-01-09 14:57:59 -05:00 · 2025-02-26 20:42:59 +01:00
parent b38039e626
commit 4f98bce6df
29 changed files with 102 additions and 66 deletions
--- a/.github/workflows/integration-runner.yml
+++ b/.github/workflows/integration-runner.yml
@@ -40,6 +40,11 @@ jobs:
          python-version: ${{ matrix.python-version }}
          cache: "poetry"
      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
          node-version: '22.x'
      - name: Comment on PR if 'integration-test' label is present
        if: github.event_name == 'pull_request' && github.event.label.name == 'integration-test'
        uses: KeisukeYamashita/create-comment@v1
--- a/evaluation/benchmarks/EDA/run_infer.py
+++ b/evaluation/benchmarks/EDA/run_infer.py
@@ -24,7 +24,6 @@ from openhands.core.config import (
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import MessageAction
 from openhands.utils.async_utils import call_async_from_sync
 game = None
@@ -122,7 +121,6 @@ def process_instance(
    # Here's how you can run the agent (similar to the `main` function) and get the final task state
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    state: State | None = asyncio.run(
        run_controller(
--- a/evaluation/benchmarks/agent_bench/run_infer.py
+++ b/evaluation/benchmarks/agent_bench/run_infer.py
@@ -34,7 +34,6 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
 from openhands.utils.async_utils import call_async_from_sync
 def get_config(
@@ -211,7 +210,6 @@ def process_instance(
    # =============================================
    runtime: Runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance=instance)
--- a/evaluation/benchmarks/aider_bench/run_infer.py
+++ b/evaluation/benchmarks/aider_bench/run_infer.py
@@ -34,7 +34,6 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
 from openhands.utils.async_utils import call_async_from_sync
 # Configure visibility of unit tests to the Agent.
 USE_UNIT_TESTS = os.environ.get('USE_UNIT_TESTS', 'false').lower() == 'true'
@@ -204,7 +203,6 @@ def process_instance(
    # =============================================
    runtime: Runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance=instance)
--- a/evaluation/benchmarks/biocoder/run_infer.py
+++ b/evaluation/benchmarks/biocoder/run_infer.py
@@ -31,7 +31,6 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
 from openhands.utils.async_utils import call_async_from_sync
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
    'CodeActAgent': functools.partial(
@@ -275,7 +274,6 @@ def process_instance(
    instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance)
    # Here's how you can run the agent (similar to the `main` function) and get the final task state
--- a/evaluation/benchmarks/bird/run_infer.py
+++ b/evaluation/benchmarks/bird/run_infer.py
@@ -34,7 +34,6 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
 from openhands.utils.async_utils import call_async_from_sync
 def codeact_user_response(state: State) -> str:
@@ -400,7 +399,6 @@ def process_instance(
    instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance)
    # Here's how you can run the agent (similar to the `main` function) and get the final task state
--- a/evaluation/benchmarks/commit0_bench/run_infer.py
+++ b/evaluation/benchmarks/commit0_bench/run_infer.py
@@ -35,7 +35,6 @@ from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation, ErrorObservation
 from openhands.events.serialization.event import event_to_dict
 from openhands.runtime.base import Runtime
 from openhands.utils.async_utils import call_async_from_sync
 from openhands.utils.shutdown_listener import sleep_if_should_continue
 USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
@@ -395,7 +394,6 @@ def process_instance(
        logger.info(f'Starting evaluation for instance {instance.instance_id}.')
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    try:
        initialize_runtime(runtime, instance)
--- a/evaluation/benchmarks/discoverybench/run_infer.py
+++ b/evaluation/benchmarks/discoverybench/run_infer.py
@@ -34,7 +34,6 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
 from openhands.utils.async_utils import call_async_from_sync
 EVALUATION_LLM = 'gpt-4-1106-preview'
@@ -282,7 +281,6 @@ def process_instance(
    # Here's how you can run the agent (similar to the `main` function) and get the final task state
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance.data_files)
    state: State | None = asyncio.run(
--- a/evaluation/benchmarks/gaia/run_infer.py
+++ b/evaluation/benchmarks/gaia/run_infer.py
@@ -31,7 +31,6 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
 from openhands.utils.async_utils import call_async_from_sync
 DATASET_CACHE_DIR = os.path.join(os.path.dirname(__file__), 'data')
@@ -149,7 +148,6 @@ def process_instance(
    logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance)
    # Here's how you can run the agent (similar to the `main` function) and get the final task state
--- a/evaluation/benchmarks/gorilla/run_infer.py
+++ b/evaluation/benchmarks/gorilla/run_infer.py
@@ -26,7 +26,6 @@ from openhands.core.config import (
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import MessageAction
 from openhands.utils.async_utils import call_async_from_sync
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
    'CodeActAgent': codeact_user_response,
@@ -83,7 +82,6 @@ def process_instance(
    # Here's how you can run the agent (similar to the `main` function) and get the final task state
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    state: State | None = asyncio.run(
        run_controller(
            config=config,
--- a/evaluation/benchmarks/gpqa/run_infer.py
+++ b/evaluation/benchmarks/gpqa/run_infer.py
@@ -49,7 +49,6 @@ from openhands.events.action import (
    MessageAction,
 )
 from openhands.events.observation import Observation
 from openhands.utils.async_utils import call_async_from_sync
 ACTION_FORMAT = """
 <<FINAL_ANSWER||
@@ -215,7 +214,6 @@ Ok now its time to start solving the question. Good luck!
 """
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    state: State | None = asyncio.run(
        run_controller(
            config=config,
--- a/evaluation/benchmarks/humanevalfix/run_infer.py
+++ b/evaluation/benchmarks/humanevalfix/run_infer.py
@@ -39,7 +39,6 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
 from openhands.utils.async_utils import call_async_from_sync
 IMPORT_HELPER = {
    'python': [
@@ -233,7 +232,6 @@ def process_instance(
    # Here's how you can run the agent (similar to the `main` function) and get the final task state
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance)
    state: State | None = asyncio.run(
        run_controller(
--- a/evaluation/benchmarks/logic_reasoning/run_infer.py
+++ b/evaluation/benchmarks/logic_reasoning/run_infer.py
@@ -31,7 +31,6 @@ from openhands.events.action import (
 )
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
 from openhands.utils.async_utils import call_async_from_sync
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
    'CodeActAgent': codeact_user_response,
@@ -207,7 +206,6 @@ def process_instance(
    instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance)
    # Here's how you can run the agent (similar to the `main` function) and get the final task state
--- a/evaluation/benchmarks/miniwob/run_infer.py
+++ b/evaluation/benchmarks/miniwob/run_infer.py
@@ -41,7 +41,6 @@ from openhands.runtime.browser.browser_env import (
    BROWSER_EVAL_GET_GOAL_ACTION,
    BROWSER_EVAL_GET_REWARDS_ACTION,
 )
 from openhands.utils.async_utils import call_async_from_sync
 SUPPORTED_AGENT_CLS = {'BrowsingAgent', 'CodeActAgent'}
@@ -146,7 +145,6 @@ def process_instance(
        logger.info(f'Starting evaluation for instance {env_id}.')
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    task_str, obs = initialize_runtime(runtime)
    task_str += (
--- a/evaluation/benchmarks/mint/run_infer.py
+++ b/evaluation/benchmarks/mint/run_infer.py
@@ -35,7 +35,6 @@ from openhands.events.action import (
 )
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
 from openhands.utils.async_utils import call_async_from_sync
 def codeact_user_response_mint(state: State, task: Task, task_config: dict[str, int]):
@@ -185,7 +184,6 @@ def process_instance(
    )
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime)
    state: State | None = asyncio.run(
--- a/evaluation/benchmarks/ml_bench/run_infer.py
+++ b/evaluation/benchmarks/ml_bench/run_infer.py
@@ -43,7 +43,6 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
 from openhands.utils.async_utils import call_async_from_sync
 config = load_app_config()
@@ -235,7 +234,6 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
    instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance)
    # Run the agent
--- a/evaluation/benchmarks/scienceagentbench/run_infer.py
+++ b/evaluation/benchmarks/scienceagentbench/run_infer.py
@@ -29,7 +29,6 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
 from openhands.utils.async_utils import call_async_from_sync
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
    'CodeActAgent': codeact_user_response,
@@ -196,7 +195,6 @@ If the program uses some packages that are incompatible, please figure out alter
 """
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime, instance)
    # Here's how you can run the agent (similar to the `main` function) and get the final task state
--- a/evaluation/benchmarks/swe_bench/run_infer.py
+++ b/evaluation/benchmarks/swe_bench/run_infer.py
@@ -40,7 +40,6 @@ from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation, ErrorObservation
 from openhands.events.serialization.event import event_to_dict
 from openhands.runtime.base import Runtime
 from openhands.utils.async_utils import call_async_from_sync
 from openhands.utils.shutdown_listener import sleep_if_should_continue
 USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
@@ -422,7 +421,6 @@ def process_instance(
            f'This is the {runtime_failure_count + 1}th attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
        )
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    try:
        initialize_runtime(runtime, instance)
--- a/evaluation/benchmarks/the_agent_company/run_infer.py
+++ b/evaluation/benchmarks/the_agent_company/run_infer.py
@@ -28,7 +28,6 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import BrowserOutputObservation, CmdOutputObservation
 from openhands.runtime.base import Runtime
 from openhands.utils.async_utils import call_async_from_sync
 def get_config(
@@ -276,7 +275,6 @@ if __name__ == '__main__':
        args.task_image_name, task_short_name, temp_dir, agent_llm_config, agent_config
    )
    runtime: Runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    init_task_env(runtime, args.server_hostname, env_llm_config)
--- a/evaluation/benchmarks/toolqa/run_infer.py
+++ b/evaluation/benchmarks/toolqa/run_infer.py
@@ -27,7 +27,6 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
 from openhands.events.observation import CmdOutputObservation
 from openhands.runtime.base import Runtime
 from openhands.utils.async_utils import call_async_from_sync
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
    'CodeActAgent': codeact_user_response,
@@ -105,7 +104,6 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
    logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    initialize_runtime(runtime)
    # Here's how you can run the agent (similar to the `main` function) and get the final task state
--- a/evaluation/benchmarks/visualwebarena/run_infer.py
+++ b/evaluation/benchmarks/visualwebarena/run_infer.py
@@ -37,7 +37,6 @@ from openhands.runtime.browser.browser_env import (
    BROWSER_EVAL_GET_GOAL_ACTION,
    BROWSER_EVAL_GET_REWARDS_ACTION,
 )
 from openhands.utils.async_utils import call_async_from_sync
 SUPPORTED_AGENT_CLS = {'VisualBrowsingAgent'}
 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
@@ -160,7 +159,6 @@ def process_instance(
        logger.info(f'Starting evaluation for instance {env_id}.')
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    task_str, goal_image_urls = initialize_runtime(runtime)
    initial_user_action = MessageAction(content=task_str, image_urls=goal_image_urls)
    state: State | None = asyncio.run(
--- a/evaluation/benchmarks/webarena/run_infer.py
+++ b/evaluation/benchmarks/webarena/run_infer.py
@@ -36,7 +36,6 @@ from openhands.runtime.browser.browser_env import (
    BROWSER_EVAL_GET_GOAL_ACTION,
    BROWSER_EVAL_GET_REWARDS_ACTION,
 )
 from openhands.utils.async_utils import call_async_from_sync
 SUPPORTED_AGENT_CLS = {'BrowsingAgent'}
@@ -145,7 +144,6 @@ def process_instance(
        logger.info(f'Starting evaluation for instance {env_id}.')
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    task_str = initialize_runtime(runtime)
    state: State | None = asyncio.run(
--- a/evaluation/integration_tests/run_infer.py
+++ b/evaluation/integration_tests/run_infer.py
@@ -30,7 +30,6 @@ from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import MessageAction
 from openhands.events.serialization.event import event_to_dict
 from openhands.runtime.base import Runtime
 from openhands.utils.async_utils import call_async_from_sync
 FAKE_RESPONSES = {
    'CodeActAgent': fake_user_response,
@@ -109,7 +108,6 @@ def process_instance(
    # create sandbox and run the agent
    # =============================================
    runtime: Runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)
    try:
        test_class.initialize_runtime(runtime)
--- a/openhands/core/cli.py
+++ b/openhands/core/cli.py
@@ -102,9 +102,16 @@ async def main(loop: asyncio.AbstractEventLoop):
    sid = str(uuid4())
    display_message(f'Session ID: {sid}')
-    runtime = create_runtime(config, sid=sid, headless_mode=True)
+    agent = create_agent(config)
-    await runtime.connect()
+
-    agent = create_agent(runtime, config)
+    runtime = create_runtime(
        config,
        sid=sid,
        headless_mode=True,
        agent=agent,
        selected_repository=config.sandbox.selected_repo,
    )
    controller, _ = create_controller(agent, runtime, config)
    event_stream = runtime.event_stream
--- a/openhands/core/config/sandbox_config.py
+++ b/openhands/core/config/sandbox_config.py
@@ -71,5 +71,6 @@ class SandboxConfig(BaseModel):
    remote_runtime_resource_factor: int = Field(default=1)
    enable_gpu: bool = Field(default=False)
    docker_runtime_kwargs: str | None = Field(default=None)
    selected_repo: str | None = Field(default=None)
    model_config = {'extra': 'forbid'}
--- a/openhands/core/config/utils.py
+++ b/openhands/core/config/utils.py
@@ -475,9 +475,9 @@ def get_parser() -> argparse.ArgumentParser:
    parser.add_argument(
        '-n',
        '--name',
-        default='',
+        help='Session name',
        type=str,
-        help='Name for the session',
+        default='',
    )
    parser.add_argument(
        '--eval-ids',
@@ -487,8 +487,15 @@ def get_parser() -> argparse.ArgumentParser:
    )
    parser.add_argument(
        '--no-auto-continue',
        help='Disable auto-continue responses in headless mode (i.e. headless will read from stdin instead of auto-continuing)',
        action='store_true',
-        help='Disable automatic "continue" responses in headless mode. Will read from stdin instead.',
+        default=False,
    )
    parser.add_argument(
        '--selected-repo',
        help='GitHub repository to clone (format: owner/repo)',
        type=str,
        default=None,
    )
    return parser
@@ -555,4 +562,8 @@ def setup_config_from_args(args: argparse.Namespace) -> AppConfig:
    if args.max_budget_per_task is not None:
        config.max_budget_per_task = args.max_budget_per_task
    # Read selected repository in config for use by CLI and main.py
    if args.selected_repo is not None:
        config.sandbox.selected_repo = args.selected_repo
    return config
--- a/openhands/core/main.py
+++ b/openhands/core/main.py
@@ -88,15 +88,20 @@ async def run_controller(
    """
    sid = sid or generate_sid(config)
    if agent is None:
        agent = create_agent(config)
    if runtime is None:
-        runtime = create_runtime(config, sid=sid, headless_mode=headless_mode)
+        runtime = create_runtime(
-        await runtime.connect()
+            config,
            sid=sid,
            headless_mode=headless_mode,
            agent=agent,
            selected_repository=config.sandbox.selected_repo,
        )
    event_stream = runtime.event_stream
    if agent is None:
        agent = create_agent(runtime, config)
    replay_events: list[Event] | None = None
    if config.replay_trajectory_path:
        logger.info('Trajectory replay is enabled')
--- a/openhands/core/setup.py
+++ b/openhands/core/setup.py
@@ -1,7 +1,10 @@
 import hashlib
 import os
 import uuid
 from typing import Tuple, Type
 from pydantic import SecretStr
 import openhands.agenthub  # noqa F401 (we import this to get the agents registered)
 from openhands.controller import AgentController
 from openhands.controller.agent import Agent
@@ -13,16 +16,21 @@ from openhands.core.logger import openhands_logger as logger
 from openhands.events import EventStream
 from openhands.events.event import Event
 from openhands.llm.llm import LLM
 from openhands.microagent.microagent import BaseMicroAgent
 from openhands.runtime import get_runtime_cls
 from openhands.runtime.base import Runtime
 from openhands.security import SecurityAnalyzer, options
 from openhands.storage import get_file_store
 from openhands.utils.async_utils import call_async_from_sync
 def create_runtime(
    config: AppConfig,
    sid: str | None = None,
    headless_mode: bool = True,
    agent: Agent | None = None,
    selected_repository: str | None = None,
    github_token: SecretStr | None = None,
 ) -> Runtime:
    """Create a runtime for the agent to run on.
@@ -31,6 +39,8 @@ def create_runtime(
        Set it to incompatible value will cause unexpected behavior on RemoteRuntime.
    headless_mode: Whether the agent is run in headless mode. `create_runtime` is typically called within evaluation scripts,
        where we don't want to have the VSCode UI open, so it defaults to True.
    selected_repository: (optional) The GitHub repository to use.
    github_token: (optional) The GitHub token to use.
    """
    # if sid is provided on the command line, use it as the name of the event stream
    # otherwise generate it on the basis of the configured jwt_secret
@@ -41,8 +51,17 @@ def create_runtime(
    file_store = get_file_store(config.file_store, config.file_store_path)
    event_stream = EventStream(session_id, file_store)
    # set up the security analyzer
    if config.security.security_analyzer:
        options.SecurityAnalyzers.get(
            config.security.security_analyzer, SecurityAnalyzer
        )(event_stream)
    # agent class
-    agent_cls = openhands.agenthub.Agent.get_cls(config.default_agent)
+    if agent:
        agent_cls = type(agent)
    else:
        agent_cls = openhands.agenthub.Agent.get_cls(config.default_agent)
    # runtime and tools
    runtime_cls = get_runtime_cls(config.runtime)
@@ -55,10 +74,38 @@ def create_runtime(
        headless_mode=headless_mode,
    )
    call_async_from_sync(runtime.connect)
    # clone selected repository if provided
    repo_directory = None
    github_token = (
        SecretStr(os.environ.get('GITHUB_TOKEN')) if not github_token else github_token
    )
    if selected_repository and github_token:
        logger.debug(f'Selected repository {selected_repository}.')
        repo_directory = runtime.clone_repo(
            github_token,
            selected_repository,
            None,
        )
    # load microagents from selected repository
    if agent and agent.prompt_manager and selected_repository and repo_directory:
        agent.prompt_manager.set_runtime_info(runtime)
        microagents: list[BaseMicroAgent] = runtime.get_microagents_from_selected_repo(
            selected_repository
        )
        agent.prompt_manager.load_microagents(microagents)
        agent.prompt_manager.set_repository_info(selected_repository, repo_directory)
    logger.debug(
        f'Runtime initialized with plugins: {[plugin.name for plugin in runtime.plugins]}'
    )
    return runtime
-def create_agent(runtime: Runtime, config: AppConfig) -> Agent:
+def create_agent(config: AppConfig) -> Agent:
    agent_cls: Type[Agent] = Agent.get_cls(config.default_agent)
    agent_config = config.get_agent_config(config.default_agent)
    llm_config = config.get_llm_config_from_agent(config.default_agent)
@@ -66,14 +113,6 @@ def create_agent(runtime: Runtime, config: AppConfig) -> Agent:
        llm=LLM(config=llm_config),
        config=agent_config,
    )
    if agent.prompt_manager:
        microagents = runtime.get_microagents_from_selected_repo(None)
        agent.prompt_manager.load_microagents(microagents)
    if config.security.security_analyzer:
        options.SecurityAnalyzers.get(
            config.security.security_analyzer, SecurityAnalyzer
        )(runtime.event_stream)
    return agent
--- a/tests/unit/test_arg_parser.py
+++ b/tests/unit/test_arg_parser.py
@@ -20,6 +20,7 @@ def test_parser_default_values():
    assert args.llm_config is None
    assert args.name == ''
    assert not args.no_auto_continue
    assert args.selected_repo is None
 def test_parser_custom_values():
@@ -52,6 +53,8 @@ def test_parser_custom_values():
            '-n',
            'test_session',
            '--no-auto-continue',
            '--selected-repo',
            'owner/repo',
        ]
    )
@@ -69,6 +72,7 @@ def test_parser_custom_values():
    assert args.name == 'test_session'
    assert args.no_auto_continue
    assert args.version
    assert args.selected_repo == 'owner/repo'
 def test_parser_file_overrides_task():
@@ -132,10 +136,18 @@ def test_help_message(capsys):
        '-n NAME, --name NAME',
        '--config-file CONFIG_FILE',
        '--no-auto-continue',
        '--selected-repo SELECTED_REPO',
    ]
    for element in expected_elements:
        assert element in help_output, f"Expected '{element}' to be in the help message"
    option_count = help_output.count('  -')
-    assert option_count == 18, f'Expected 18 options, found {option_count}'
+    assert option_count == 19, f'Expected 19 options, found {option_count}'
 def test_selected_repo_format():
    """Test that the selected-repo argument accepts owner/repo format."""
    parser = get_parser()
    args = parser.parse_args(['--selected-repo', 'owner/repo'])
    assert args.selected_repo == 'owner/repo'