Feat sandbox skills (#11785)

2026-01-09 14:57:59 -05:00 · 2025-11-20 10:52:13 +00:00
parent 77b565ce08
commit ba883ffeca
10 changed files with 81 additions and 12 deletions
--- a/openhands/app_server/app_conversation/app_conversation_service.py
+++ b/openhands/app_server/app_conversation/app_conversation_service.py
@@ -11,6 +11,7 @@ from openhands.app_server.app_conversation.app_conversation_models import (
    AppConversationStartRequest,
    AppConversationStartTask,
 )
 from openhands.app_server.sandbox.sandbox_models import SandboxInfo
 from openhands.app_server.services.injector import Injector
 from openhands.sdk.utils.models import DiscriminatedUnionMixin
 from openhands.sdk.workspace.remote.async_remote_workspace import AsyncRemoteWorkspace
@@ -91,6 +92,7 @@ class AppConversationService(ABC):
    async def run_setup_scripts(
        self,
        task: AppConversationStartTask,
        sandbox: SandboxInfo,
        workspace: AsyncRemoteWorkspace,
    ) -> AsyncGenerator[AppConversationStartTask, None]:
        """Run the setup scripts for the project and yield status updates"""
--- a/openhands/app_server/app_conversation/app_conversation_service_base.py
+++ b/openhands/app_server/app_conversation/app_conversation_service_base.py
@@ -18,9 +18,12 @@ from openhands.app_server.app_conversation.app_conversation_service import (
 from openhands.app_server.app_conversation.skill_loader import (
    load_global_skills,
    load_repo_skills,
    load_sandbox_skills,
    merge_skills,
 )
 from openhands.app_server.sandbox.sandbox_models import SandboxInfo
 from openhands.app_server.user.user_context import UserContext
 from openhands.sdk import Agent
 from openhands.sdk.context.agent_context import AgentContext
 from openhands.sdk.context.skills import load_user_skills
 from openhands.sdk.workspace.remote.async_remote_workspace import AsyncRemoteWorkspace
@@ -41,6 +44,7 @@ class AppConversationServiceBase(AppConversationService, ABC):
    async def _load_and_merge_all_skills(
        self,
        sandbox: SandboxInfo,
        remote_workspace: AsyncRemoteWorkspace,
        selected_repository: str | None,
        working_dir: str,
@@ -62,6 +66,7 @@ class AppConversationServiceBase(AppConversationService, ABC):
            _logger.debug('Loading skills for V1 conversation')
            # Load skills from all sources
            sandbox_skills = load_sandbox_skills(sandbox)
            global_skills = load_global_skills()
            # Load user skills from ~/.openhands/skills/ directory
            # Uses the SDK's load_user_skills() function which handles loading from
@@ -79,7 +84,9 @@ class AppConversationServiceBase(AppConversationService, ABC):
            )
            # Merge all skills (later lists override earlier ones)
-            all_skills = merge_skills([global_skills, user_skills, repo_skills])
+            all_skills = merge_skills(
                [sandbox_skills, global_skills, user_skills, repo_skills]
            )
            _logger.info(
                f'Loaded {len(all_skills)} total skills: {[s.name for s in all_skills]}'
@@ -121,7 +128,8 @@ class AppConversationServiceBase(AppConversationService, ABC):
    async def _load_skills_and_update_agent(
        self,
-        agent,
+        sandbox: SandboxInfo,
        agent: Agent,
        remote_workspace: AsyncRemoteWorkspace,
        selected_repository: str | None,
        working_dir: str,
@@ -139,7 +147,7 @@ class AppConversationServiceBase(AppConversationService, ABC):
        """
        # Load and merge all skills
        all_skills = await self._load_and_merge_all_skills(
-            remote_workspace, selected_repository, working_dir
+            sandbox, remote_workspace, selected_repository, working_dir
        )
        # Update agent with skills
@@ -150,6 +158,7 @@ class AppConversationServiceBase(AppConversationService, ABC):
    async def run_setup_scripts(
        self,
        task: AppConversationStartTask,
        sandbox: SandboxInfo,
        workspace: AsyncRemoteWorkspace,
    ) -> AsyncGenerator[AppConversationStartTask, None]:
        task.status = AppConversationStartTaskStatus.PREPARING_REPOSITORY
@@ -167,6 +176,7 @@ class AppConversationServiceBase(AppConversationService, ABC):
        task.status = AppConversationStartTaskStatus.SETTING_UP_SKILLS
        yield task
        await self._load_and_merge_all_skills(
            sandbox,
            workspace,
            task.request.selected_repository,
            workspace.working_dir,
--- a/openhands/app_server/app_conversation/live_status_app_conversation_service.py
+++ b/openhands/app_server/app_conversation/live_status_app_conversation_service.py
@@ -218,12 +218,15 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
                api_key=sandbox.session_api_key,
                working_dir=sandbox_spec.working_dir,
            )
-            async for updated_task in self.run_setup_scripts(task, remote_workspace):
+            async for updated_task in self.run_setup_scripts(
                task, sandbox, remote_workspace
            ):
                yield updated_task
            # Build the start request
            start_conversation_request = (
                await self._build_start_conversation_request_for_user(
                    sandbox,
                    request.initial_message,
                    request.git_provider,
                    sandbox_spec.working_dir,
@@ -512,6 +515,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
    async def _build_start_conversation_request_for_user(
        self,
        sandbox: SandboxInfo,
        initial_message: SendMessageRequest | None,
        git_provider: ProviderType | None,
        working_dir: str,
@@ -558,6 +562,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
            api_key=user.llm_api_key,
            usage_id='agent',
        )
        # The agent gets passed initial instructions
        # Select agent based on agent_type
        if agent_type == AgentType.PLAN:
            agent = get_planning_agent(llm=llm)
@@ -573,7 +578,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
        if remote_workspace:
            try:
                agent = await self._load_skills_and_update_agent(
-                    agent, remote_workspace, selected_repository, working_dir
+                    sandbox, agent, remote_workspace, selected_repository, working_dir
                )
            except Exception as e:
                _logger.warning(f'Failed to load skills: {e}', exc_info=True)
--- a/openhands/app_server/app_conversation/skill_loader.py
+++ b/openhands/app_server/app_conversation/skill_loader.py
@@ -13,6 +13,7 @@ import os
 from pathlib import Path
 import openhands
 from openhands.app_server.sandbox.sandbox_models import SandboxInfo
 from openhands.sdk.context.skills import Skill
 from openhands.sdk.workspace.remote.async_remote_workspace import AsyncRemoteWorkspace
@@ -23,6 +24,8 @@ GLOBAL_SKILLS_DIR = os.path.join(
    os.path.dirname(os.path.dirname(openhands.__file__)),
    'skills',
 )
 WORK_HOSTS_SKILL = """The user has access to the following hosts for accessing a web application,
 each of which has a corresponding port:"""
 def _find_and_load_global_skill_files(skill_dir: Path) -> list[Skill]:
@@ -57,6 +60,20 @@ def _find_and_load_global_skill_files(skill_dir: Path) -> list[Skill]:
    return skills
 def load_sandbox_skills(sandbox: SandboxInfo) -> list[Skill]:
    """Load skills specific to the sandbox, including exposed ports / urls."""
    if not sandbox.exposed_urls:
        return []
    urls = [url for url in sandbox.exposed_urls if url.name.startswith('WORKER_')]
    if not urls:
        return []
    content_list = [WORK_HOSTS_SKILL]
    for url in urls:
        content_list.append(f'* {url.url} (port {url.port})')
    content = '\n'.join(content_list)
    return [Skill(name='work_hosts', content=content, trigger=None)]
 def load_global_skills() -> list[Skill]:
    """Load global skills from OpenHands/skills/ directory.
--- a/openhands/app_server/sandbox/docker_sandbox_service.py
+++ b/openhands/app_server/sandbox/docker_sandbox_service.py
@@ -162,6 +162,7 @@ class DockerSandboxService(SandboxService):
                                ExposedUrl(
                                    name=exposed_port.name,
                                    url=url,
                                    port=host_port,
                                )
                            )
--- a/openhands/app_server/sandbox/process_sandbox_service.py
+++ b/openhands/app_server/sandbox/process_sandbox_service.py
@@ -208,6 +208,7 @@ class ProcessSandboxService(SandboxService):
                        ExposedUrl(
                            name=AGENT_SERVER,
                            url=f'http://localhost:{process_info.port}',
                            port=process_info.port,
                        ),
                    ]
                    session_api_key = process_info.session_api_key
--- a/openhands/app_server/sandbox/remote_sandbox_service.py
+++ b/openhands/app_server/sandbox/remote_sandbox_service.py
@@ -64,6 +64,10 @@ STATUS_MAPPING = {
    'starting': SandboxStatus.STARTING,
    'error': SandboxStatus.ERROR,
 }
 AGENT_SERVER_PORT = 60000
 VSCODE_PORT = 60001
 WORKER_1_PORT = 12000
 WORKER_2_PORT = 12001
 class StoredRemoteSandbox(Base):  # type: ignore
@@ -138,17 +142,29 @@ class RemoteSandboxService(SandboxService):
                exposed_urls = []
                url = runtime.get('url', None)
                if url:
-                    exposed_urls.append(ExposedUrl(name=AGENT_SERVER, url=url))
+                    exposed_urls.append(
                        ExposedUrl(name=AGENT_SERVER, url=url, port=AGENT_SERVER_PORT)
                    )
                    vscode_url = (
                        _build_service_url(url, 'vscode')
                        + f'/?tkn={session_api_key}&folder=%2Fworkspace%2Fproject'
                    )
                    exposed_urls.append(ExposedUrl(name=VSCODE, url=vscode_url))
                    exposed_urls.append(
-                        ExposedUrl(name=WORKER_1, url=_build_service_url(url, 'work-1'))
+                        ExposedUrl(name=VSCODE, url=vscode_url, port=VSCODE_PORT)
                    )
                    exposed_urls.append(
-                        ExposedUrl(name=WORKER_2, url=_build_service_url(url, 'work-2'))
+                        ExposedUrl(
                            name=WORKER_1,
                            url=_build_service_url(url, 'work-1'),
                            port=WORKER_1_PORT,
                        )
                    )
                    exposed_urls.append(
                        ExposedUrl(
                            name=WORKER_2,
                            url=_build_service_url(url, 'work-2'),
                            port=WORKER_2_PORT,
                        )
                    )
            else:
                exposed_urls = None
--- a/openhands/app_server/sandbox/sandbox_models.py
+++ b/openhands/app_server/sandbox/sandbox_models.py
@@ -20,6 +20,7 @@ class ExposedUrl(BaseModel):
    name: str
    url: str
    port: int
 # Standard names
--- a/tests/unit/experiments/test_experiment_manager.py
+++ b/tests/unit/experiments/test_experiment_manager.py
@@ -9,6 +9,7 @@ import pytest
 from openhands.app_server.app_conversation.live_status_app_conversation_service import (
    LiveStatusAppConversationService,
 )
 from openhands.app_server.sandbox.sandbox_models import SandboxInfo, SandboxStatus
 from openhands.experiments.experiment_manager import ExperimentManager
 from openhands.sdk import Agent
 from openhands.sdk.llm import LLM
@@ -191,6 +192,14 @@ class TestExperimentManagerIntegration:
            access_token_hard_timeout=None,
        )
        sandbox = SandboxInfo(
            id='mock-sandbox-id',
            created_by_user_id='mock-user-id',
            sandbox_spec_id='mock-sandbox-spec-id',
            status=SandboxStatus.RUNNING,
            session_api_key='mock-session-api-key',
        )
        # Patch the pieces invoked by the service
        with (
            patch(
@@ -204,6 +213,7 @@ class TestExperimentManagerIntegration:
        ):
            # --- Act: build the start request
            start_req = await service._build_start_conversation_request_for_user(
                sandbox=sandbox,
                initial_message=None,
                git_provider=None,  # Keep secrets path simple
                working_dir='/tmp/project',  # Arbitrary path
--- a/tests/unit/server/data_models/test_conversation.py
+++ b/tests/unit/server/data_models/test_conversation.py
@@ -2150,7 +2150,9 @@ async def test_delete_v1_conversation_with_sub_conversations():
        sandbox_spec_id='test-spec-id',
        status=SandboxStatus.RUNNING,
        session_api_key='test-api-key',
-        exposed_urls=[ExposedUrl(name=AGENT_SERVER, url='http://agent:8000')],
+        exposed_urls=[
            ExposedUrl(name=AGENT_SERVER, url='http://agent:8000', port=8000)
        ],
    )
    mock_sandbox_service.get_sandbox = AsyncMock(return_value=mock_sandbox)
@@ -2269,7 +2271,9 @@ async def test_delete_v1_conversation_with_no_sub_conversations():
        sandbox_spec_id='test-spec-id',
        status=SandboxStatus.RUNNING,
        session_api_key='test-api-key',
-        exposed_urls=[ExposedUrl(name=AGENT_SERVER, url='http://agent:8000')],
+        exposed_urls=[
            ExposedUrl(name=AGENT_SERVER, url='http://agent:8000', port=8000)
        ],
    )
    mock_sandbox_service.get_sandbox = AsyncMock(return_value=mock_sandbox)
@@ -2418,7 +2422,9 @@ async def test_delete_v1_conversation_sub_conversation_deletion_error():
        sandbox_spec_id='test-spec-id',
        status=SandboxStatus.RUNNING,
        session_api_key='test-api-key',
-        exposed_urls=[ExposedUrl(name=AGENT_SERVER, url='http://agent:8000')],
+        exposed_urls=[
            ExposedUrl(name=AGENT_SERVER, url='http://agent:8000', port=8000)
        ],
    )
    mock_sandbox_service.get_sandbox = AsyncMock(return_value=mock_sandbox)