Fix issue #4859 : [Bug]: Regression on copy-paste text into the chat prompt input box

2026-04-29 03:00:45 -04:00 · 2024-11-09 06:34:40 +00:00
27 changed files with 283 additions and 246 deletions
--- a/docs/modules/usage/llms/llms.md
+++ b/docs/modules/usage/llms/llms.md
@@ -4,11 +4,11 @@ OpenHands can connect to any LLM supported by LiteLLM. However, it requires a po

 ## Model Recommendations

-Based on our evaluations of language models for coding tasks (using the SWE-bench dataset), we can provide some recommendations for model selection. Some analyses can be found in [this blog article comparing LLMs](https://www.all-hands.dev/blog/evaluation-of-llms-as-coding-agents-on-swe-bench-at-30x-speed) and [this blog article with some more recent results](https://www.all-hands.dev/blog/openhands-codeact-21-an-open-state-of-the-art-software-development-agent).
+Based on a recent evaluation of language models for coding tasks (using the SWE-bench dataset), we can provide some recommendations for model selection. The full analysis can be found in [this blog article](https://www.all-hands.dev/blog/evaluation-of-llms-as-coding-agents-on-swe-bench-at-30x-speed).

 When choosing a model, consider both the quality of outputs and the associated costs. Here's a summary of the findings:

- Claude 3.5 Sonnet is the best by a fair amount, achieving a 53% resolve rate on SWE-Bench Verified with the default agent in OpenHands.
+- Claude 3.5 Sonnet is the best by a fair amount, achieving a 27% resolve rate with the default agent in OpenHands.
 - GPT-4o lags behind, and o1-mini actually performed somewhat worse than GPT-4o. We went in and analyzed the results a little, and briefly it seemed like o1 was sometimes "overthinking" things, performing extra environment configuration tasks when it could just go ahead and finish the task.
 - Finally, the strongest open models were Llama 3.1 405 B and deepseek-v2.5, and they performed reasonably, even besting some of the closed models.

--- a/evaluation/EDA/run_infer.py
+++ b/evaluation/EDA/run_infer.py
@@ -35,8 +35,7 @@ def codeact_user_response_eda(state: State) -> str:

    # retrieve the latest model message from history
    if state.history:
-        last_agent_message = state.get_last_agent_message()
-        model_guess = last_agent_message.content if last_agent_message else ''
+        model_guess = state.get_last_agent_message()

    assert game is not None, 'Game is not initialized.'
    msg = game.generate_user_response(model_guess)
@@ -141,8 +140,7 @@ def process_instance(
    if state is None:
        raise ValueError('State should not be None.')

-    last_agent_message = state.get_last_agent_message()
-    final_message = last_agent_message.content if last_agent_message else ''
+    final_message = state.get_last_agent_message()

    logger.info(f'Final message: {final_message} | Ground truth: {instance["text"]}')
    test_result = game.reward()
--- a/evaluation/gorilla/run_infer.py
+++ b/evaluation/gorilla/run_infer.py
@@ -102,8 +102,7 @@ def process_instance(
        raise ValueError('State should not be None.')

    # retrieve the last message from the agent
-    last_agent_message = state.get_last_agent_message()
-    model_answer_raw = last_agent_message.content if last_agent_message else ''
+    model_answer_raw = state.get_last_agent_message()

    # attempt to parse model_answer
    ast_eval_fn = instance['ast_eval']
--- a/evaluation/swe_bench/eval_infer.py
+++ b/evaluation/swe_bench/eval_infer.py
@@ -83,7 +83,6 @@ def get_config(instance: pd.Series) -> AppConfig:
            timeout=1800,
            api_key=os.environ.get('ALLHANDS_API_KEY', None),
            remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
-            remote_runtime_init_timeout=1800,
        ),
        # do not mount workspace
        workspace_base=None,
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@@ -146,7 +146,6 @@ def get_config(
            api_key=os.environ.get('ALLHANDS_API_KEY', None),
            remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
            keep_remote_runtime_alive=False,
-            remote_runtime_init_timeout=1800,
        ),
        # do not mount workspace
        workspace_base=None,
--- a/evaluation/toolqa/run_infer.py
+++ b/evaluation/toolqa/run_infer.py
@@ -127,8 +127,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
        raise ValueError('State should not be None.')

    # retrieve the last message from the agent
-    last_agent_message = state.get_last_agent_message()
-    model_answer_raw = last_agent_message.content if last_agent_message else ''
+    model_answer_raw = state.get_last_agent_message()

    # attempt to parse model_answer
    correct = eval_answer(str(model_answer_raw), str(answer))
--- a/frontend/server.log
+++ b/frontend/server.log
@@ -1,11 +0,0 @@
-
-> openhands-frontend@0.13.0 dev
-> npm run make-i18n && cross-env VITE_MOCK_API=false remix vite:dev
-
-
-> openhands-frontend@0.13.0 make-i18n
-> node scripts/make-i18n-translations.cjs
-
-  ➜  Local:   http://localhost:3001/
-  ➜  Network: use --host to expose
-  ➜  press h + enter to show help
--- a/frontend/src/components/chat-input.tsx
+++ b/frontend/src/components/chat-input.tsx
@@ -41,17 +41,19 @@ export function ChatInput({

  const handlePaste = (event: React.ClipboardEvent<HTMLTextAreaElement>) => {
    // Only handle paste if we have an image paste handler and there are files
-    if (onImagePaste && event.clipboardData.files.length > 0) {
-      const files = Array.from(event.clipboardData.files).filter((file) =>
-        file.type.startsWith("image/"),
-      );
-      // Only prevent default if we found image files to handle
-      if (files.length > 0) {
-        event.preventDefault();
-        onImagePaste(files);
-      }
+    if (!onImagePaste || event.clipboardData.files.length === 0) {
+      // For text paste, let the default behavior handle it
+      return;
+    }
+
+    const files = Array.from(event.clipboardData.files).filter((file) =>
+      file.type.startsWith("image/"),
+    );
+    // Only prevent default if we found image files to handle
+    if (files.length > 0) {
+      event.preventDefault();
+      onImagePaste(files);
    }
-    // For text paste, let the default behavior handle it
  };

  const handleDragOver = (event: React.DragEvent<HTMLTextAreaElement>) => {
--- a/frontend/src/components/project-menu/ProjectMenuCard.tsx
+++ b/frontend/src/components/project-menu/ProjectMenuCard.tsx
@@ -43,7 +43,10 @@ export function ProjectMenuCard({
    posthog.capture("push_to_github_button_clicked");
    const rawEvent = {
      content: `
-Please push the changes to GitHub and open a pull request.
+Let's push the code to GitHub.
+If we're currently on the openhands-workspace branch, please create a new branch with a descriptive name.
+Commit any changes and push them to the remote repository.
+Finally, open up a pull request using the GitHub API and the token in the GITHUB_TOKEN environment variable, then show me the URL of the pull request.
 `,
      imageUrls: [],
      timestamp: new Date().toISOString(),
--- a/frontend/src/hooks/useTerminal.ts
+++ b/frontend/src/hooks/useTerminal.ts
@@ -101,10 +101,6 @@ export const useTerminal = (
          if (commandBuffer.length > 0) {
            commandBuffer = handleBackspace(commandBuffer);
          }
-        } else if (domEvent.key === "Tab") {
-          // Swallow tab key and convert to space
-          commandBuffer += " ";
-          terminal.current?.write(" ");
        } else {
          // Ignore paste event
          if (key.charCodeAt(0) === 22) {
--- a/frontend/src/utils/verified-models.ts
+++ b/frontend/src/utils/verified-models.ts
@@ -1,6 +1,10 @@
 // Here are the list of verified models and providers that we know work well with OpenHands.
 export const VERIFIED_PROVIDERS = ["openai", "azure", "anthropic"];
-export const VERIFIED_MODELS = ["gpt-4o", "claude-3-5-sonnet-20241022"];
+export const VERIFIED_MODELS = [
+  "gpt-4o",
+  "claude-3-5-sonnet-20240620",
+  "claude-3-5-sonnet-20241022",
+];

 // LiteLLM does not return OpenAI models with the provider, so we list them here to set them ourselves for consistency
 // (e.g., they return `gpt-4o` instead of `openai/gpt-4o`)
@@ -19,8 +23,11 @@ export const VERIFIED_OPENAI_MODELS = [
 export const VERIFIED_ANTHROPIC_MODELS = [
  "claude-2",
  "claude-2.1",
+  "claude-3-5-sonnet-20241022",
  "claude-3-5-sonnet-20240620",
  "claude-3-haiku-20240307",
  "claude-3-opus-20240229",
  "claude-3-sonnet-20240229",
+  "claude-instant-1",
+  "claude-instant-1.2",
 ];
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -39,6 +39,7 @@ from openhands.runtime.plugins import (
    JupyterRequirement,
    PluginRequirement,
 )
+from openhands.utils.microagent import MicroAgent
 from openhands.utils.prompt import PromptManager


@@ -85,6 +86,16 @@ class CodeActAgent(Agent):
        super().__init__(llm, config)
        self.reset()

+        self.micro_agent = (
+            MicroAgent(
+                os.path.join(
+                    os.path.dirname(__file__), 'micro', f'{config.micro_agent_name}.md'
+                )
+            )
+            if config.micro_agent_name
+            else None
+        )
+
        self.function_calling_active = self.config.function_calling
        if self.function_calling_active and not self.llm.is_function_calling_active():
            logger.warning(
@@ -94,6 +105,7 @@ class CodeActAgent(Agent):
            self.function_calling_active = False

        if self.function_calling_active:
+            # Function calling mode
            self.tools = codeact_function_calling.get_tools(
                codeact_enable_browsing=self.config.codeact_enable_browsing,
                codeact_enable_jupyter=self.config.codeact_enable_jupyter,
@@ -102,17 +114,18 @@ class CodeActAgent(Agent):
            logger.debug(
                f'TOOLS loaded for CodeActAgent: {json.dumps(self.tools, indent=2)}'
            )
-            self.prompt_manager = PromptManager(
-                microagent_dir=os.path.join(os.path.dirname(__file__), 'micro'),
-                prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts', 'tools'),
-            )
+            self.system_prompt = codeact_function_calling.SYSTEM_PROMPT
+            self.initial_user_message = None
        else:
+            # Non-function-calling mode
            self.action_parser = CodeActResponseParser()
            self.prompt_manager = PromptManager(
-                microagent_dir=os.path.join(os.path.dirname(__file__), 'micro'),
-                prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts', 'default'),
+                prompt_dir=os.path.join(os.path.dirname(__file__)),
                agent_skills_docs=AgentSkillsRequirement.documentation,
+                micro_agent=self.micro_agent,
            )
+            self.system_prompt = self.prompt_manager.system_message
+            self.initial_user_message = self.prompt_manager.initial_user_message

        self.pending_actions: deque[Action] = deque()

@@ -324,8 +337,8 @@ class CodeActAgent(Agent):
            return self.pending_actions.popleft()

        # if we're done, go back
-        latest_user_message = state.get_last_user_message()
-        if latest_user_message and latest_user_message.content.strip() == '/exit':
+        last_user_message = state.get_last_user_message()
+        if last_user_message and last_user_message.strip() == '/exit':
            return AgentFinishAction()

        # prepare what we want to send to the LLM
@@ -390,19 +403,17 @@ class CodeActAgent(Agent):
                role='system',
                content=[
                    TextContent(
-                        text=self.prompt_manager.get_system_message(),
-                        cache_prompt=self.llm.is_caching_prompt_active(),
+                        text=self.system_prompt,
+                        cache_prompt=self.llm.is_caching_prompt_active(),  # Cache system prompt
                    )
                ],
            )
        ]
-        example_message = self.prompt_manager.get_example_user_message()
-        if example_message:
+        if self.initial_user_message:
            messages.append(
                Message(
                    role='user',
-                    content=[TextContent(text=example_message)],
-                    cache_prompt=self.llm.is_caching_prompt_active(),
+                    content=[TextContent(text=self.initial_user_message)],
                )
            )

@@ -451,9 +462,8 @@ class CodeActAgent(Agent):
                pending_tool_call_action_messages.pop(response_id)

            for message in messages_to_add:
+                # add regular message
                if message:
-                    if message.role == 'user':
-                        self.prompt_manager.enhance_message(message)
                    # handle error if the message is the SAME role as the previous message
                    # litellm.exceptions.BadRequestError: litellm.BadRequestError: OpenAIException - Error code: 400 - {'detail': 'Only supports u/a/u/a/u...'}
                    # there shouldn't be two consecutive messages from the same role
@@ -483,6 +493,23 @@ class CodeActAgent(Agent):
                        break

        if not self.function_calling_active:
-            self.prompt_manager.add_turns_left_reminder(messages, state)
+            # The latest user message is important:
+            # we want to remind the agent of the environment constraints
+            latest_user_message = next(
+                islice(
+                    (
+                        m
+                        for m in reversed(messages)
+                        if m.role == 'user'
+                        and any(isinstance(c, TextContent) for c in m.content)
+                    ),
+                    1,
+                ),
+                None,
+            )
+            # do not add this for function calling
+            if latest_user_message:
+                reminder_text = f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with <finish></finish>.'
+                latest_user_message.content.append(TextContent(text=reminder_text))

        return messages
--- a/openhands/agenthub/codeact_agent/function_calling.py
+++ b/openhands/agenthub/codeact_agent/function_calling.py
@@ -25,6 +25,14 @@ from openhands.events.action import (
 )
 from openhands.events.tool import ToolCallMetadata

+SYSTEM_PROMPT = """You are OpenHands agent, a helpful AI assistant that can interact with a computer to solve tasks.
+<IMPORTANT>
+* If user provides a path, you should NOT assume it's relative to the current working directory. Instead, you should explore the file system to find the file before working on it.
+* When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise.
+* The assistant MUST NOT include comments in the code unless they are necessary to describe non-obvious behavior.
+</IMPORTANT>
+"""
+
 _BASH_DESCRIPTION = """Execute a bash command in the terminal.
 * Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
 * Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command=`ctrl+c` to interrupt the process.
--- a/openhands/agenthub/codeact_agent/micro/flarglebargle.md
+++ b/openhands/agenthub/codeact_agent/micro/flarglebargle.md
@@ -1,9 +0,0 @@
---
-name: flarglebargle
-agent: CodeActAgent
-triggers:
- flarglebargle
---
-
-IMPORTANT! The user has said the magic word "flarglebargle". You must
-only respond with a message telling them how smart they are
--- a/openhands/agenthub/codeact_agent/micro/github.md
+++ b/openhands/agenthub/codeact_agent/micro/github.md
@@ -1,31 +1,69 @@
 ---
 name: github
 agent: CodeActAgent
-triggers:
- github
- git
+require_env_var:
+    SANDBOX_ENV_GITHUB_TOKEN: "Create a GitHub Personal Access Token (https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) and set it as SANDBOX_GITHUB_TOKEN in your environment variables."
 ---

-You have access to an environment variable, `GITHUB_TOKEN`, which allows you to interact with
-the GitHub API.
+# How to Interact with Github

-You can use `curl` with the `GITHUB_TOKEN` to interact with GitHub's API.
-ALWAYS use the GitHub API for operations instead of a web browser.
+## Environment Variable Available

-Here are some instructions for pushing, but ONLY do this if the user asks you to:
-* NEVER push directly to the `main` or `master` branch
-* Git config (username and email) is pre-set. Do not modify.
-* You may already be on a branch called `openhands-workspace`. Create a new branch with a better name before pushing.
-* Use the GitHub API to create a pull request, if you haven't already
-* Use the main branch as the base branch, unless the user requests otherwise
-* After opening or updating a pull request, send the user a short message with a link to the pull request.
-* Do all of the above in as few steps as possible. E.g. you could open a PR with one step by running the following bash commands:
-```bash
-git checkout -b create-widget
-git add .
-git commit -m "Create widget"
-git push origin create-widget
-curl -X POST "https://api.github.com/repos/CodeActOrg/openhands/pulls" \
-    -H "Authorization: Bearer $GITHUB_TOKEN" \
-    -d '{"title":"Create widget","head":"create-widget","base":"openhands-workspace"}'
+- `GITHUB_TOKEN`: A read-only token for Github.
+
+## Using GitHub's RESTful API
+
+Use `curl` with the `GITHUB_TOKEN` to interact with GitHub's API. Here are some common operations:
+
+Here's a template for API calls:
+
+```sh
+curl -H "Authorization: token $GITHUB_TOKEN" \
+    "https://api.github.com/{endpoint}"
 ```
+
+First replace `{endpoint}` with the specific API path. Common operations:
+
+1. View an issue or pull request:
+   - Issues: `/repos/{owner}/{repo}/issues/{issue_number}`
+   - Pull requests: `/repos/{owner}/{repo}/pulls/{pull_request_number}`
+
+2. List repository issues or pull requests:
+   - Issues: `/repos/{owner}/{repo}/issues`
+   - Pull requests: `/repos/{owner}/{repo}/pulls`
+
+3. Search issues or pull requests:
+   - `/search/issues?q=repo:{owner}/{repo}+is:{type}+{search_term}+state:{state}`
+   - Replace `{type}` with `issue` or `pr`
+
+4. List repository branches:
+   `/repos/{owner}/{repo}/branches`
+
+5. Get commit details:
+   `/repos/{owner}/{repo}/commits/{commit_sha}`
+
+6. Get repository details:
+   `/repos/{owner}/{repo}`
+
+7. Get user information:
+   `/user`
+
+8. Search repositories:
+   `/search/repositories?q={query}`
+
+9. Get rate limit status:
+   `/rate_limit`
+
+Replace `{owner}`, `{repo}`, `{commit_sha}`, `{issue_number}`, `{pull_request_number}`,
+`{search_term}`, `{state}`, and `{query}` with appropriate values.
+
+## Important Notes
+
+1. Always use the GitHub API for operations instead of a web browser.
+2. The `GITHUB_TOKEN` is read-only. Avoid operations that require write access.
+3. Git config (username and email) is pre-set. Do not modify.
+4. Edit and test code locally. Never push directly to remote.
+5. Verify correct branch before committing.
+6. Commit changes frequently.
+7. If the issue or task is ambiguous or lacks sufficient detail, always request clarification from the user before proceeding.
+8. You should avoid using command line tools like `sed` for file editing.
--- a/openhands/agenthub/codeact_agent/prompts/tools/system_prompt.j2
+++ b/openhands/agenthub/codeact_agent/prompts/tools/system_prompt.j2
@@ -1,7 +0,0 @@
-You are OpenHands agent, a helpful AI assistant that can interact with a computer to solve tasks.
-<IMPORTANT>
-* If user provides a path, you should NOT assume it's relative to the current working directory. Instead, you should explore the file system to find the file before working on it.
-* When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise.
-* The assistant MUST NOT include comments in the code unless they are necessary to describe non-obvious behavior.
-</IMPORTANT>
-
--- a/openhands/agenthub/codeact_agent/prompts/tools/user_prompt.j2
+++ b/openhands/agenthub/codeact_agent/prompts/tools/user_prompt.j2
--- a/openhands/agenthub/codeact_agent/prompts/default/system_prompt.j2
+++ b/openhands/agenthub/codeact_agent/prompts/default/system_prompt.j2
--- a/openhands/agenthub/codeact_agent/prompts/default/user_prompt.j2
+++ b/openhands/agenthub/codeact_agent/prompts/default/user_prompt.j2
@@ -215,5 +215,12 @@ The server is running on port 5000 with PID 126. You can access the list of numb
 {% endset %}
 Here is an example of how you can interact with the environment for task solving:
 {{ DEFAULT_EXAMPLE }}
+{% if micro_agent %}
+--- BEGIN OF GUIDELINE ---
+The following information may assist you in completing your task:
+
+{{ micro_agent }}
+--- END OF GUIDELINE ---
+{% endif %}

 NOW, LET'S START!
--- a/openhands/agenthub/codeact_swe_agent/codeact_swe_agent.py
+++ b/openhands/agenthub/codeact_swe_agent/codeact_swe_agent.py
@@ -155,7 +155,7 @@ class CodeActSWEAgent(Agent):
        """
        # if we're done, go back
        last_user_message = state.get_last_user_message()
-        if last_user_message and last_user_message.content.strip() == '/exit':
+        if last_user_message and last_user_message.strip() == '/exit':
            return AgentFinishAction()

        # prepare what we want to send to the LLM
--- a/openhands/controller/state/state.py
+++ b/openhands/controller/state/state.py
@@ -156,14 +156,14 @@ class State:

        return last_user_message, last_user_message_image_urls

-    def get_last_agent_message(self) -> MessageAction | None:
+    def get_last_agent_message(self) -> str | None:
        for event in reversed(self.history):
            if isinstance(event, MessageAction) and event.source == EventSource.AGENT:
-                return event
+                return event.content
        return None

-    def get_last_user_message(self) -> MessageAction | None:
+    def get_last_user_message(self) -> str | None:
        for event in reversed(self.history):
            if isinstance(event, MessageAction) and event.source == EventSource.USER:
-                return event
+                return event.content
        return None
--- a/openhands/core/config/sandbox_config.py
+++ b/openhands/core/config/sandbox_config.py
@@ -14,8 +14,7 @@ class SandboxConfig:
        base_container_image: The base container image from which to build the runtime image.
        runtime_container_image: The runtime container image to use.
        user_id: The user ID for the sandbox.
-        timeout: The timeout for the default sandbox action execution.
-        remote_runtime_init_timeout: The timeout for the remote runtime to start.
+        timeout: The timeout for the sandbox.
        enable_auto_lint: Whether to enable auto-lint.
        use_host_network: Whether to use the host network.
        initialize_plugins: Whether to initialize plugins.
@@ -42,7 +41,6 @@ class SandboxConfig:
    runtime_container_image: str | None = None
    user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000
    timeout: int = 120
-    remote_runtime_init_timeout: int = 180
    enable_auto_lint: bool = (
        False  # once enabled, OpenHands would lint files after editing
    )
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -1,7 +1,7 @@
 import os
+from pathlib import Path
 import tempfile
 import threading
-from pathlib import Path
 from typing import Callable, Optional
 from zipfile import ZipFile

@@ -260,19 +260,13 @@ class RemoteRuntime(Runtime):
                {'X-Session-API-Key': start_response['session_api_key']}
            )

+    @tenacity.retry(
+        stop=tenacity.stop_after_delay(180) | stop_if_should_exit(),
+        reraise=True,
+        retry=tenacity.retry_if_exception_type(RuntimeNotReadyError),
+        wait=tenacity.wait_fixed(2),
+    )
    def _wait_until_alive(self):
-        retry_decorator = tenacity.retry(
-            stop=tenacity.stop_after_delay(
-                self.config.sandbox.remote_runtime_init_timeout
-            )
-            | stop_if_should_exit(),
-            reraise=True,
-            retry=tenacity.retry_if_exception_type(RuntimeNotReadyError),
-            wait=tenacity.wait_fixed(2),
-        )
-        return retry_decorator(self._wait_until_alive_impl)()
-
-    def _wait_until_alive_impl(self):
        self.log('debug', f'Waiting for runtime to be alive at url: {self.runtime_url}')
        runtime_info_response = self._send_request(
            'GET',
--- a/openhands/utils/microagent.py
+++ b/openhands/utils/microagent.py
@@ -3,11 +3,15 @@ import os
 import frontmatter
 import pydantic

+from openhands.controller.agent import Agent
+from openhands.core.exceptions import MicroAgentValidationError
+from openhands.core.logger import openhands_logger as logger
+

 class MicroAgentMetadata(pydantic.BaseModel):
    name: str
    agent: str
-    triggers: list[str] = []
+    require_env_var: dict[str, str]


 class MicroAgent:
@@ -19,30 +23,22 @@ class MicroAgent:
            self._loaded = frontmatter.load(file)
            self._content = self._loaded.content
            self._metadata = MicroAgentMetadata(**self._loaded.metadata)
-
-    def get_trigger(self, message: str) -> str | None:
-        message = message.lower()
-        for trigger in self.triggers:
-            if trigger.lower() in message:
-                return trigger
-        return None
+        self._validate_micro_agent()

    @property
    def content(self) -> str:
        return self._content

-    @property
-    def metadata(self) -> MicroAgentMetadata:
-        return self._metadata
-
-    @property
-    def name(self) -> str:
-        return self._metadata.name
-
-    @property
-    def triggers(self) -> list[str]:
-        return self._metadata.triggers
-
-    @property
-    def agent(self) -> str:
-        return self._metadata.agent
+    def _validate_micro_agent(self):
+        logger.debug(
+            f'Loading and validating micro agent [{self._metadata.name}] based on [{self._metadata.agent}]'
+        )
+        # Make sure the agent is registered
+        agent_cls = Agent.get_cls(self._metadata.agent)
+        assert agent_cls is not None
+        # Make sure the environment variables are set
+        for env_var, instruction in self._metadata.require_env_var.items():
+            if env_var not in os.environ:
+                raise MicroAgentValidationError(
+                    f'Environment variable [{env_var}] is required by micro agent [{self._metadata.name}] but not set. {instruction}'
+                )
--- a/openhands/utils/prompt.py
+++ b/openhands/utils/prompt.py
@@ -1,10 +1,7 @@
 import os
-from itertools import islice

 from jinja2 import Template

-from openhands.controller.state.state import State
-from openhands.core.message import Message, TextContent
 from openhands.utils.microagent import MicroAgent


@@ -19,31 +16,21 @@ class PromptManager:
    Attributes:
        prompt_dir (str): Directory containing prompt templates.
        agent_skills_docs (str): Documentation of agent skills.
+        micro_agent (MicroAgent | None): Micro-agent, if specified.
    """

    def __init__(
        self,
        prompt_dir: str,
-        microagent_dir: str = '',
-        agent_skills_docs: str = '',
+        agent_skills_docs: str,
+        micro_agent: MicroAgent | None = None,
    ):
        self.prompt_dir: str = prompt_dir
        self.agent_skills_docs: str = agent_skills_docs

        self.system_template: Template = self._load_template('system_prompt')
        self.user_template: Template = self._load_template('user_prompt')
-        self.microagents: dict = {}
-
-        microagent_files = []
-        if microagent_dir:
-            microagent_files = [
-                os.path.join(microagent_dir, f)
-                for f in os.listdir(microagent_dir)
-                if f.endswith('.md')
-            ]
-        for microagent_file in microagent_files:
-            microagent = MicroAgent(microagent_file)
-            self.microagents[microagent.name] = microagent
+        self.micro_agent: MicroAgent | None = micro_agent

    def _load_template(self, template_name: str) -> Template:
        template_path = os.path.join(self.prompt_dir, f'{template_name}.j2')
@@ -52,13 +39,15 @@ class PromptManager:
        with open(template_path, 'r') as file:
            return Template(file.read())

-    def get_system_message(self) -> str:
+    @property
+    def system_message(self) -> str:
        rendered = self.system_template.render(
            agent_skills_docs=self.agent_skills_docs,
        ).strip()
        return rendered

-    def get_example_user_message(self) -> str:
+    @property
+    def initial_user_message(self) -> str:
        """This is the initial user message provided to the agent
        before *actual* user instructions are provided.

@@ -68,39 +57,7 @@ class PromptManager:
        These additional context will convert the current generic agent
        into a more specialized agent that is tailored to the user's task.
        """
-        return self.user_template.render().strip()
-
-    def enhance_message(self, message: Message) -> None:
-        """Enhance the user message with additional context.
-
-        This method is used to enhance the user message with additional context
-        about the user's task. The additional context will convert the current
-        generic agent into a more specialized agent that is tailored to the user's task.
-        """
-        if not message.content:
-            return
-        message_content = message.content[0].text
-        for microagent in self.microagents.values():
-            trigger = microagent.get_trigger(message_content)
-            if trigger:
-                micro_text = f'<extra_info>\nThe following information has been included based on a keyword match for "{trigger}". It may or may not be relevant to the user\'s request.'
-                micro_text += '\n\n' + microagent.content
-                micro_text += '\n</extra_info>'
-                message.content.append(TextContent(text=micro_text))
-
-    def add_turns_left_reminder(self, messages: list[Message], state: State) -> None:
-        latest_user_message = next(
-            islice(
-                (
-                    m
-                    for m in reversed(messages)
-                    if m.role == 'user'
-                    and any(isinstance(c, TextContent) for c in m.content)
-                ),
-                1,
-            ),
-            None,
+        rendered = self.user_template.render(
+            micro_agent=self.micro_agent.content if self.micro_agent else None
        )
-        if latest_user_message:
-            reminder_text = f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with <finish></finish>.'
-            latest_user_message.content.append(TextContent(text=reminder_text))
+        return rendered.strip()
--- a/tests/unit/test_microagent_utils.py
+++ b/tests/unit/test_microagent_utils.py
@@ -1,8 +1,13 @@
 import os

+import pytest
 from pytest import MonkeyPatch

 import openhands.agenthub  # noqa: F401
+from openhands.core.exceptions import (
+    AgentNotRegisteredError,
+    MicroAgentValidationError,
+)
 from openhands.utils.microagent import MicroAgent

 CONTENT = (
@@ -29,3 +34,40 @@ def test_micro_agent_load(tmp_path, monkeypatch: MonkeyPatch):
    micro_agent = MicroAgent(os.path.join(tmp_path, 'dummy.md'))
    assert micro_agent is not None
    assert micro_agent.content == CONTENT.strip()
+
+
+def test_not_existing_agent(tmp_path, monkeypatch: MonkeyPatch):
+    with open(os.path.join(tmp_path, 'dummy.md'), 'w') as f:
+        f.write(
+            (
+                '---\n'
+                'name: dummy\n'
+                'agent: NotExistingAgent\n'
+                'require_env_var:\n'
+                '  SANDBOX_OPENHANDS_TEST_ENV_VAR: "Set this environment variable for testing purposes"\n'
+                '---\n' + CONTENT
+            )
+        )
+    monkeypatch.setenv('SANDBOX_OPENHANDS_TEST_ENV_VAR', 'dummy_value')
+
+    with pytest.raises(AgentNotRegisteredError):
+        MicroAgent(os.path.join(tmp_path, 'dummy.md'))
+
+
+def test_not_existing_env_var(tmp_path):
+    with open(os.path.join(tmp_path, 'dummy.md'), 'w') as f:
+        f.write(
+            (
+                '---\n'
+                'name: dummy\n'
+                'agent: CodeActAgent\n'
+                'require_env_var:\n'
+                '  SANDBOX_OPENHANDS_TEST_ENV_VAR: "Set this environment variable for testing purposes"\n'
+                '---\n' + CONTENT
+            )
+        )
+
+    with pytest.raises(MicroAgentValidationError) as excinfo:
+        MicroAgent(os.path.join(tmp_path, 'dummy.md'))
+
+    assert 'Set this environment variable for testing purposes' in str(excinfo.value)
--- a/tests/unit/test_prompt_manager.py
+++ b/tests/unit/test_prompt_manager.py
@@ -1,9 +1,9 @@
 import os
 import shutil
+from unittest.mock import Mock

 import pytest

-from openhands.core.message import Message, TextContent
 from openhands.utils.microagent import MicroAgent
 from openhands.utils.prompt import PromptManager

@@ -11,9 +11,7 @@ from openhands.utils.prompt import PromptManager
@pytest.fixture
 def prompt_dir(tmp_path):
    # Copy contents from "openhands/agenthub/codeact_agent" to the temp directory
-    shutil.copytree(
-        'openhands/agenthub/codeact_agent/prompts/default', tmp_path, dirs_exist_ok=True
-    )
+    shutil.copytree('openhands/agenthub/codeact_agent', tmp_path, dirs_exist_ok=True)

    # Return the temporary directory path
    return tmp_path
@@ -27,79 +25,78 @@ def agent_skills_docs():
    return SAMPLE_AGENT_SKILLS_DOCS


-def test_prompt_manager_without_microagent(prompt_dir, agent_skills_docs):
-    manager = PromptManager(
-        prompt_dir, microagent_dir='', agent_skills_docs=agent_skills_docs
-    )
+def test_prompt_manager_without_micro_agent(prompt_dir, agent_skills_docs):
+    manager = PromptManager(prompt_dir, agent_skills_docs)

    assert manager.prompt_dir == prompt_dir
    assert manager.agent_skills_docs == agent_skills_docs
-    assert len(manager.microagents) == 0
+    assert manager.micro_agent is None

-    assert isinstance(manager.get_system_message(), str)
+    assert isinstance(manager.system_message, str)
    assert (
        "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed answers to the user's questions."
-        in manager.get_system_message()
+        in manager.system_message
    )
-    assert SAMPLE_AGENT_SKILLS_DOCS in manager.get_system_message()
-    assert isinstance(manager.get_example_user_message(), str)
-    assert '--- BEGIN OF GUIDELINE ---' not in manager.get_example_user_message()
-    assert '--- END OF GUIDELINE ---' not in manager.get_example_user_message()
-    assert "NOW, LET'S START!" in manager.get_example_user_message()
-    assert 'microagent' not in manager.get_example_user_message()
+    assert SAMPLE_AGENT_SKILLS_DOCS in manager.system_message
+    assert isinstance(manager.initial_user_message, str)
+    assert '--- BEGIN OF GUIDELINE ---' not in manager.initial_user_message
+    assert '--- END OF GUIDELINE ---' not in manager.initial_user_message
+    assert "NOW, LET'S START!" in manager.initial_user_message
+    assert 'micro_agent' not in manager.initial_user_message


-def test_prompt_manager_with_microagent(prompt_dir, agent_skills_docs):
-    microagent_name = 'test_microagent'
-    microagent_content = """
---
-name: flarglebargle
-agent: CodeActAgent
-triggers:
- flarglebargle
---
-
-IMPORTANT! The user has said the magic word "flarglebargle". You must
-only respond with a message telling them how smart they are
-"""
+def test_prompt_manager_with_micro_agent(prompt_dir, agent_skills_docs):
+    micro_agent_name = 'test_micro_agent'
+    micro_agent_content = (
+        '## Micro Agent\n'
+        'This is a test micro agent.\n'
+        'It is used to test the prompt manager.\n'
+    )

    # Create a temporary micro agent file
    os.makedirs(os.path.join(prompt_dir, 'micro'), exist_ok=True)
-    with open(os.path.join(prompt_dir, 'micro', f'{microagent_name}.md'), 'w') as f:
-        f.write(microagent_content)
+    with open(os.path.join(prompt_dir, 'micro', f'{micro_agent_name}.md'), 'w') as f:
+        f.write(micro_agent_content)
+
+    # Mock MicroAgent
+    mock_micro_agent = Mock(spec=MicroAgent)
+    mock_micro_agent.content = micro_agent_content

    manager = PromptManager(
        prompt_dir=prompt_dir,
-        microagent_dir=os.path.join(prompt_dir, 'micro'),
        agent_skills_docs=agent_skills_docs,
+        micro_agent=mock_micro_agent,
    )

    assert manager.prompt_dir == prompt_dir
    assert manager.agent_skills_docs == agent_skills_docs
-    assert len(manager.microagents) == 1
+    assert manager.micro_agent == mock_micro_agent

-    assert isinstance(manager.get_system_message(), str)
+    assert isinstance(manager.system_message, str)
    assert (
        "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed answers to the user's questions."
-        in manager.get_system_message()
+        in manager.system_message
    )
-    assert SAMPLE_AGENT_SKILLS_DOCS in manager.get_system_message()
+    assert SAMPLE_AGENT_SKILLS_DOCS in manager.system_message

-    assert isinstance(manager.get_example_user_message(), str)
+    assert isinstance(manager.initial_user_message, str)
+    assert (
+        '--- BEGIN OF GUIDELINE ---\n'
+        + 'The following information may assist you in completing your task:\n\n'
+        + micro_agent_content
+        + '\n'
+        + '--- END OF GUIDELINE ---\n'
+        + "\n\nNOW, LET'S START!"
+    ) in manager.initial_user_message
+    assert micro_agent_content in manager.initial_user_message

-    message = Message(
-        role='user',
-        content=[TextContent(text='Hello, flarglebargle!')],
-    )
-    manager.enhance_message(message)
-    assert 'magic word' in message.content[1].text
-
-    os.remove(os.path.join(prompt_dir, 'micro', f'{microagent_name}.md'))
+    # Clean up the temporary file
+    os.remove(os.path.join(prompt_dir, 'micro', f'{micro_agent_name}.md'))


 def test_prompt_manager_file_not_found(prompt_dir, agent_skills_docs):
    with pytest.raises(FileNotFoundError):
-        MicroAgent(os.path.join(prompt_dir, 'micro', 'non_existent_microagent.md'))
+        MicroAgent(os.path.join(prompt_dir, 'micro', 'non_existent_micro_agent.md'))


 def test_prompt_manager_template_rendering(prompt_dir, agent_skills_docs):
@@ -107,14 +104,12 @@ def test_prompt_manager_template_rendering(prompt_dir, agent_skills_docs):
    with open(os.path.join(prompt_dir, 'system_prompt.j2'), 'w') as f:
        f.write('System prompt: {{ agent_skills_docs }}')
    with open(os.path.join(prompt_dir, 'user_prompt.j2'), 'w') as f:
-        f.write('User prompt: foo')
+        f.write('User prompt: {{ micro_agent }}')

-    manager = PromptManager(
-        prompt_dir, microagent_dir='', agent_skills_docs=agent_skills_docs
-    )
+    manager = PromptManager(prompt_dir, agent_skills_docs)

-    assert manager.get_system_message() == f'System prompt: {agent_skills_docs}'
-    assert manager.get_example_user_message() == 'User prompt: foo'
+    assert manager.system_message == f'System prompt: {agent_skills_docs}'
+    assert manager.initial_user_message == 'User prompt: None'

    # Clean up temporary files
    os.remove(os.path.join(prompt_dir, 'system_prompt.j2'))