mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
1 Commits
openhands-
...
openhands-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
76a8fdd734 |
@@ -4,11 +4,11 @@ OpenHands can connect to any LLM supported by LiteLLM. However, it requires a po
|
||||
|
||||
## Model Recommendations
|
||||
|
||||
Based on our evaluations of language models for coding tasks (using the SWE-bench dataset), we can provide some recommendations for model selection. Some analyses can be found in [this blog article comparing LLMs](https://www.all-hands.dev/blog/evaluation-of-llms-as-coding-agents-on-swe-bench-at-30x-speed) and [this blog article with some more recent results](https://www.all-hands.dev/blog/openhands-codeact-21-an-open-state-of-the-art-software-development-agent).
|
||||
Based on a recent evaluation of language models for coding tasks (using the SWE-bench dataset), we can provide some recommendations for model selection. The full analysis can be found in [this blog article](https://www.all-hands.dev/blog/evaluation-of-llms-as-coding-agents-on-swe-bench-at-30x-speed).
|
||||
|
||||
When choosing a model, consider both the quality of outputs and the associated costs. Here's a summary of the findings:
|
||||
|
||||
- Claude 3.5 Sonnet is the best by a fair amount, achieving a 53% resolve rate on SWE-Bench Verified with the default agent in OpenHands.
|
||||
- Claude 3.5 Sonnet is the best by a fair amount, achieving a 27% resolve rate with the default agent in OpenHands.
|
||||
- GPT-4o lags behind, and o1-mini actually performed somewhat worse than GPT-4o. We went in and analyzed the results a little, and briefly it seemed like o1 was sometimes "overthinking" things, performing extra environment configuration tasks when it could just go ahead and finish the task.
|
||||
- Finally, the strongest open models were Llama 3.1 405 B and deepseek-v2.5, and they performed reasonably, even besting some of the closed models.
|
||||
|
||||
|
||||
@@ -35,8 +35,7 @@ def codeact_user_response_eda(state: State) -> str:
|
||||
|
||||
# retrieve the latest model message from history
|
||||
if state.history:
|
||||
last_agent_message = state.get_last_agent_message()
|
||||
model_guess = last_agent_message.content if last_agent_message else ''
|
||||
model_guess = state.get_last_agent_message()
|
||||
|
||||
assert game is not None, 'Game is not initialized.'
|
||||
msg = game.generate_user_response(model_guess)
|
||||
@@ -141,8 +140,7 @@ def process_instance(
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
last_agent_message = state.get_last_agent_message()
|
||||
final_message = last_agent_message.content if last_agent_message else ''
|
||||
final_message = state.get_last_agent_message()
|
||||
|
||||
logger.info(f'Final message: {final_message} | Ground truth: {instance["text"]}')
|
||||
test_result = game.reward()
|
||||
|
||||
@@ -102,8 +102,7 @@ def process_instance(
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
# retrieve the last message from the agent
|
||||
last_agent_message = state.get_last_agent_message()
|
||||
model_answer_raw = last_agent_message.content if last_agent_message else ''
|
||||
model_answer_raw = state.get_last_agent_message()
|
||||
|
||||
# attempt to parse model_answer
|
||||
ast_eval_fn = instance['ast_eval']
|
||||
|
||||
@@ -83,7 +83,6 @@ def get_config(instance: pd.Series) -> AppConfig:
|
||||
timeout=1800,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
remote_runtime_init_timeout=1800,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@@ -146,7 +146,6 @@ def get_config(
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_remote_runtime_alive=False,
|
||||
remote_runtime_init_timeout=1800,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@@ -127,8 +127,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
# retrieve the last message from the agent
|
||||
last_agent_message = state.get_last_agent_message()
|
||||
model_answer_raw = last_agent_message.content if last_agent_message else ''
|
||||
model_answer_raw = state.get_last_agent_message()
|
||||
|
||||
# attempt to parse model_answer
|
||||
correct = eval_answer(str(model_answer_raw), str(answer))
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
|
||||
> openhands-frontend@0.13.0 dev
|
||||
> npm run make-i18n && cross-env VITE_MOCK_API=false remix vite:dev
|
||||
|
||||
|
||||
> openhands-frontend@0.13.0 make-i18n
|
||||
> node scripts/make-i18n-translations.cjs
|
||||
|
||||
➜ Local: http://localhost:3001/
|
||||
➜ Network: use --host to expose
|
||||
➜ press h + enter to show help
|
||||
@@ -41,17 +41,19 @@ export function ChatInput({
|
||||
|
||||
const handlePaste = (event: React.ClipboardEvent<HTMLTextAreaElement>) => {
|
||||
// Only handle paste if we have an image paste handler and there are files
|
||||
if (onImagePaste && event.clipboardData.files.length > 0) {
|
||||
const files = Array.from(event.clipboardData.files).filter((file) =>
|
||||
file.type.startsWith("image/"),
|
||||
);
|
||||
// Only prevent default if we found image files to handle
|
||||
if (files.length > 0) {
|
||||
event.preventDefault();
|
||||
onImagePaste(files);
|
||||
}
|
||||
if (!onImagePaste || event.clipboardData.files.length === 0) {
|
||||
// For text paste, let the default behavior handle it
|
||||
return;
|
||||
}
|
||||
|
||||
const files = Array.from(event.clipboardData.files).filter((file) =>
|
||||
file.type.startsWith("image/"),
|
||||
);
|
||||
// Only prevent default if we found image files to handle
|
||||
if (files.length > 0) {
|
||||
event.preventDefault();
|
||||
onImagePaste(files);
|
||||
}
|
||||
// For text paste, let the default behavior handle it
|
||||
};
|
||||
|
||||
const handleDragOver = (event: React.DragEvent<HTMLTextAreaElement>) => {
|
||||
|
||||
@@ -43,7 +43,10 @@ export function ProjectMenuCard({
|
||||
posthog.capture("push_to_github_button_clicked");
|
||||
const rawEvent = {
|
||||
content: `
|
||||
Please push the changes to GitHub and open a pull request.
|
||||
Let's push the code to GitHub.
|
||||
If we're currently on the openhands-workspace branch, please create a new branch with a descriptive name.
|
||||
Commit any changes and push them to the remote repository.
|
||||
Finally, open up a pull request using the GitHub API and the token in the GITHUB_TOKEN environment variable, then show me the URL of the pull request.
|
||||
`,
|
||||
imageUrls: [],
|
||||
timestamp: new Date().toISOString(),
|
||||
|
||||
@@ -101,10 +101,6 @@ export const useTerminal = (
|
||||
if (commandBuffer.length > 0) {
|
||||
commandBuffer = handleBackspace(commandBuffer);
|
||||
}
|
||||
} else if (domEvent.key === "Tab") {
|
||||
// Swallow tab key and convert to space
|
||||
commandBuffer += " ";
|
||||
terminal.current?.write(" ");
|
||||
} else {
|
||||
// Ignore paste event
|
||||
if (key.charCodeAt(0) === 22) {
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
// Here are the list of verified models and providers that we know work well with OpenHands.
|
||||
export const VERIFIED_PROVIDERS = ["openai", "azure", "anthropic"];
|
||||
export const VERIFIED_MODELS = ["gpt-4o", "claude-3-5-sonnet-20241022"];
|
||||
export const VERIFIED_MODELS = [
|
||||
"gpt-4o",
|
||||
"claude-3-5-sonnet-20240620",
|
||||
"claude-3-5-sonnet-20241022",
|
||||
];
|
||||
|
||||
// LiteLLM does not return OpenAI models with the provider, so we list them here to set them ourselves for consistency
|
||||
// (e.g., they return `gpt-4o` instead of `openai/gpt-4o`)
|
||||
@@ -19,8 +23,11 @@ export const VERIFIED_OPENAI_MODELS = [
|
||||
export const VERIFIED_ANTHROPIC_MODELS = [
|
||||
"claude-2",
|
||||
"claude-2.1",
|
||||
"claude-3-5-sonnet-20241022",
|
||||
"claude-3-5-sonnet-20240620",
|
||||
"claude-3-haiku-20240307",
|
||||
"claude-3-opus-20240229",
|
||||
"claude-3-sonnet-20240229",
|
||||
"claude-instant-1",
|
||||
"claude-instant-1.2",
|
||||
];
|
||||
|
||||
@@ -39,6 +39,7 @@ from openhands.runtime.plugins import (
|
||||
JupyterRequirement,
|
||||
PluginRequirement,
|
||||
)
|
||||
from openhands.utils.microagent import MicroAgent
|
||||
from openhands.utils.prompt import PromptManager
|
||||
|
||||
|
||||
@@ -85,6 +86,16 @@ class CodeActAgent(Agent):
|
||||
super().__init__(llm, config)
|
||||
self.reset()
|
||||
|
||||
self.micro_agent = (
|
||||
MicroAgent(
|
||||
os.path.join(
|
||||
os.path.dirname(__file__), 'micro', f'{config.micro_agent_name}.md'
|
||||
)
|
||||
)
|
||||
if config.micro_agent_name
|
||||
else None
|
||||
)
|
||||
|
||||
self.function_calling_active = self.config.function_calling
|
||||
if self.function_calling_active and not self.llm.is_function_calling_active():
|
||||
logger.warning(
|
||||
@@ -94,6 +105,7 @@ class CodeActAgent(Agent):
|
||||
self.function_calling_active = False
|
||||
|
||||
if self.function_calling_active:
|
||||
# Function calling mode
|
||||
self.tools = codeact_function_calling.get_tools(
|
||||
codeact_enable_browsing=self.config.codeact_enable_browsing,
|
||||
codeact_enable_jupyter=self.config.codeact_enable_jupyter,
|
||||
@@ -102,17 +114,18 @@ class CodeActAgent(Agent):
|
||||
logger.debug(
|
||||
f'TOOLS loaded for CodeActAgent: {json.dumps(self.tools, indent=2)}'
|
||||
)
|
||||
self.prompt_manager = PromptManager(
|
||||
microagent_dir=os.path.join(os.path.dirname(__file__), 'micro'),
|
||||
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts', 'tools'),
|
||||
)
|
||||
self.system_prompt = codeact_function_calling.SYSTEM_PROMPT
|
||||
self.initial_user_message = None
|
||||
else:
|
||||
# Non-function-calling mode
|
||||
self.action_parser = CodeActResponseParser()
|
||||
self.prompt_manager = PromptManager(
|
||||
microagent_dir=os.path.join(os.path.dirname(__file__), 'micro'),
|
||||
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts', 'default'),
|
||||
prompt_dir=os.path.join(os.path.dirname(__file__)),
|
||||
agent_skills_docs=AgentSkillsRequirement.documentation,
|
||||
micro_agent=self.micro_agent,
|
||||
)
|
||||
self.system_prompt = self.prompt_manager.system_message
|
||||
self.initial_user_message = self.prompt_manager.initial_user_message
|
||||
|
||||
self.pending_actions: deque[Action] = deque()
|
||||
|
||||
@@ -324,8 +337,8 @@ class CodeActAgent(Agent):
|
||||
return self.pending_actions.popleft()
|
||||
|
||||
# if we're done, go back
|
||||
latest_user_message = state.get_last_user_message()
|
||||
if latest_user_message and latest_user_message.content.strip() == '/exit':
|
||||
last_user_message = state.get_last_user_message()
|
||||
if last_user_message and last_user_message.strip() == '/exit':
|
||||
return AgentFinishAction()
|
||||
|
||||
# prepare what we want to send to the LLM
|
||||
@@ -390,19 +403,17 @@ class CodeActAgent(Agent):
|
||||
role='system',
|
||||
content=[
|
||||
TextContent(
|
||||
text=self.prompt_manager.get_system_message(),
|
||||
cache_prompt=self.llm.is_caching_prompt_active(),
|
||||
text=self.system_prompt,
|
||||
cache_prompt=self.llm.is_caching_prompt_active(), # Cache system prompt
|
||||
)
|
||||
],
|
||||
)
|
||||
]
|
||||
example_message = self.prompt_manager.get_example_user_message()
|
||||
if example_message:
|
||||
if self.initial_user_message:
|
||||
messages.append(
|
||||
Message(
|
||||
role='user',
|
||||
content=[TextContent(text=example_message)],
|
||||
cache_prompt=self.llm.is_caching_prompt_active(),
|
||||
content=[TextContent(text=self.initial_user_message)],
|
||||
)
|
||||
)
|
||||
|
||||
@@ -451,9 +462,8 @@ class CodeActAgent(Agent):
|
||||
pending_tool_call_action_messages.pop(response_id)
|
||||
|
||||
for message in messages_to_add:
|
||||
# add regular message
|
||||
if message:
|
||||
if message.role == 'user':
|
||||
self.prompt_manager.enhance_message(message)
|
||||
# handle error if the message is the SAME role as the previous message
|
||||
# litellm.exceptions.BadRequestError: litellm.BadRequestError: OpenAIException - Error code: 400 - {'detail': 'Only supports u/a/u/a/u...'}
|
||||
# there shouldn't be two consecutive messages from the same role
|
||||
@@ -483,6 +493,23 @@ class CodeActAgent(Agent):
|
||||
break
|
||||
|
||||
if not self.function_calling_active:
|
||||
self.prompt_manager.add_turns_left_reminder(messages, state)
|
||||
# The latest user message is important:
|
||||
# we want to remind the agent of the environment constraints
|
||||
latest_user_message = next(
|
||||
islice(
|
||||
(
|
||||
m
|
||||
for m in reversed(messages)
|
||||
if m.role == 'user'
|
||||
and any(isinstance(c, TextContent) for c in m.content)
|
||||
),
|
||||
1,
|
||||
),
|
||||
None,
|
||||
)
|
||||
# do not add this for function calling
|
||||
if latest_user_message:
|
||||
reminder_text = f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with <finish></finish>.'
|
||||
latest_user_message.content.append(TextContent(text=reminder_text))
|
||||
|
||||
return messages
|
||||
|
||||
@@ -25,6 +25,14 @@ from openhands.events.action import (
|
||||
)
|
||||
from openhands.events.tool import ToolCallMetadata
|
||||
|
||||
SYSTEM_PROMPT = """You are OpenHands agent, a helpful AI assistant that can interact with a computer to solve tasks.
|
||||
<IMPORTANT>
|
||||
* If user provides a path, you should NOT assume it's relative to the current working directory. Instead, you should explore the file system to find the file before working on it.
|
||||
* When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise.
|
||||
* The assistant MUST NOT include comments in the code unless they are necessary to describe non-obvious behavior.
|
||||
</IMPORTANT>
|
||||
"""
|
||||
|
||||
_BASH_DESCRIPTION = """Execute a bash command in the terminal.
|
||||
* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
|
||||
* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command=`ctrl+c` to interrupt the process.
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
---
|
||||
name: flarglebargle
|
||||
agent: CodeActAgent
|
||||
triggers:
|
||||
- flarglebargle
|
||||
---
|
||||
|
||||
IMPORTANT! The user has said the magic word "flarglebargle". You must
|
||||
only respond with a message telling them how smart they are
|
||||
@@ -1,31 +1,69 @@
|
||||
---
|
||||
name: github
|
||||
agent: CodeActAgent
|
||||
triggers:
|
||||
- github
|
||||
- git
|
||||
require_env_var:
|
||||
SANDBOX_ENV_GITHUB_TOKEN: "Create a GitHub Personal Access Token (https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) and set it as SANDBOX_GITHUB_TOKEN in your environment variables."
|
||||
---
|
||||
|
||||
You have access to an environment variable, `GITHUB_TOKEN`, which allows you to interact with
|
||||
the GitHub API.
|
||||
# How to Interact with Github
|
||||
|
||||
You can use `curl` with the `GITHUB_TOKEN` to interact with GitHub's API.
|
||||
ALWAYS use the GitHub API for operations instead of a web browser.
|
||||
## Environment Variable Available
|
||||
|
||||
Here are some instructions for pushing, but ONLY do this if the user asks you to:
|
||||
* NEVER push directly to the `main` or `master` branch
|
||||
* Git config (username and email) is pre-set. Do not modify.
|
||||
* You may already be on a branch called `openhands-workspace`. Create a new branch with a better name before pushing.
|
||||
* Use the GitHub API to create a pull request, if you haven't already
|
||||
* Use the main branch as the base branch, unless the user requests otherwise
|
||||
* After opening or updating a pull request, send the user a short message with a link to the pull request.
|
||||
* Do all of the above in as few steps as possible. E.g. you could open a PR with one step by running the following bash commands:
|
||||
```bash
|
||||
git checkout -b create-widget
|
||||
git add .
|
||||
git commit -m "Create widget"
|
||||
git push origin create-widget
|
||||
curl -X POST "https://api.github.com/repos/CodeActOrg/openhands/pulls" \
|
||||
-H "Authorization: Bearer $GITHUB_TOKEN" \
|
||||
-d '{"title":"Create widget","head":"create-widget","base":"openhands-workspace"}'
|
||||
- `GITHUB_TOKEN`: A read-only token for Github.
|
||||
|
||||
## Using GitHub's RESTful API
|
||||
|
||||
Use `curl` with the `GITHUB_TOKEN` to interact with GitHub's API. Here are some common operations:
|
||||
|
||||
Here's a template for API calls:
|
||||
|
||||
```sh
|
||||
curl -H "Authorization: token $GITHUB_TOKEN" \
|
||||
"https://api.github.com/{endpoint}"
|
||||
```
|
||||
|
||||
First replace `{endpoint}` with the specific API path. Common operations:
|
||||
|
||||
1. View an issue or pull request:
|
||||
- Issues: `/repos/{owner}/{repo}/issues/{issue_number}`
|
||||
- Pull requests: `/repos/{owner}/{repo}/pulls/{pull_request_number}`
|
||||
|
||||
2. List repository issues or pull requests:
|
||||
- Issues: `/repos/{owner}/{repo}/issues`
|
||||
- Pull requests: `/repos/{owner}/{repo}/pulls`
|
||||
|
||||
3. Search issues or pull requests:
|
||||
- `/search/issues?q=repo:{owner}/{repo}+is:{type}+{search_term}+state:{state}`
|
||||
- Replace `{type}` with `issue` or `pr`
|
||||
|
||||
4. List repository branches:
|
||||
`/repos/{owner}/{repo}/branches`
|
||||
|
||||
5. Get commit details:
|
||||
`/repos/{owner}/{repo}/commits/{commit_sha}`
|
||||
|
||||
6. Get repository details:
|
||||
`/repos/{owner}/{repo}`
|
||||
|
||||
7. Get user information:
|
||||
`/user`
|
||||
|
||||
8. Search repositories:
|
||||
`/search/repositories?q={query}`
|
||||
|
||||
9. Get rate limit status:
|
||||
`/rate_limit`
|
||||
|
||||
Replace `{owner}`, `{repo}`, `{commit_sha}`, `{issue_number}`, `{pull_request_number}`,
|
||||
`{search_term}`, `{state}`, and `{query}` with appropriate values.
|
||||
|
||||
## Important Notes
|
||||
|
||||
1. Always use the GitHub API for operations instead of a web browser.
|
||||
2. The `GITHUB_TOKEN` is read-only. Avoid operations that require write access.
|
||||
3. Git config (username and email) is pre-set. Do not modify.
|
||||
4. Edit and test code locally. Never push directly to remote.
|
||||
5. Verify correct branch before committing.
|
||||
6. Commit changes frequently.
|
||||
7. If the issue or task is ambiguous or lacks sufficient detail, always request clarification from the user before proceeding.
|
||||
8. You should avoid using command line tools like `sed` for file editing.
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
You are OpenHands agent, a helpful AI assistant that can interact with a computer to solve tasks.
|
||||
<IMPORTANT>
|
||||
* If user provides a path, you should NOT assume it's relative to the current working directory. Instead, you should explore the file system to find the file before working on it.
|
||||
* When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise.
|
||||
* The assistant MUST NOT include comments in the code unless they are necessary to describe non-obvious behavior.
|
||||
</IMPORTANT>
|
||||
|
||||
@@ -215,5 +215,12 @@ The server is running on port 5000 with PID 126. You can access the list of numb
|
||||
{% endset %}
|
||||
Here is an example of how you can interact with the environment for task solving:
|
||||
{{ DEFAULT_EXAMPLE }}
|
||||
{% if micro_agent %}
|
||||
--- BEGIN OF GUIDELINE ---
|
||||
The following information may assist you in completing your task:
|
||||
|
||||
{{ micro_agent }}
|
||||
--- END OF GUIDELINE ---
|
||||
{% endif %}
|
||||
|
||||
NOW, LET'S START!
|
||||
@@ -155,7 +155,7 @@ class CodeActSWEAgent(Agent):
|
||||
"""
|
||||
# if we're done, go back
|
||||
last_user_message = state.get_last_user_message()
|
||||
if last_user_message and last_user_message.content.strip() == '/exit':
|
||||
if last_user_message and last_user_message.strip() == '/exit':
|
||||
return AgentFinishAction()
|
||||
|
||||
# prepare what we want to send to the LLM
|
||||
|
||||
@@ -156,14 +156,14 @@ class State:
|
||||
|
||||
return last_user_message, last_user_message_image_urls
|
||||
|
||||
def get_last_agent_message(self) -> MessageAction | None:
|
||||
def get_last_agent_message(self) -> str | None:
|
||||
for event in reversed(self.history):
|
||||
if isinstance(event, MessageAction) and event.source == EventSource.AGENT:
|
||||
return event
|
||||
return event.content
|
||||
return None
|
||||
|
||||
def get_last_user_message(self) -> MessageAction | None:
|
||||
def get_last_user_message(self) -> str | None:
|
||||
for event in reversed(self.history):
|
||||
if isinstance(event, MessageAction) and event.source == EventSource.USER:
|
||||
return event
|
||||
return event.content
|
||||
return None
|
||||
|
||||
@@ -14,8 +14,7 @@ class SandboxConfig:
|
||||
base_container_image: The base container image from which to build the runtime image.
|
||||
runtime_container_image: The runtime container image to use.
|
||||
user_id: The user ID for the sandbox.
|
||||
timeout: The timeout for the default sandbox action execution.
|
||||
remote_runtime_init_timeout: The timeout for the remote runtime to start.
|
||||
timeout: The timeout for the sandbox.
|
||||
enable_auto_lint: Whether to enable auto-lint.
|
||||
use_host_network: Whether to use the host network.
|
||||
initialize_plugins: Whether to initialize plugins.
|
||||
@@ -42,7 +41,6 @@ class SandboxConfig:
|
||||
runtime_container_image: str | None = None
|
||||
user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000
|
||||
timeout: int = 120
|
||||
remote_runtime_init_timeout: int = 180
|
||||
enable_auto_lint: bool = (
|
||||
False # once enabled, OpenHands would lint files after editing
|
||||
)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Callable, Optional
|
||||
from zipfile import ZipFile
|
||||
|
||||
@@ -260,19 +260,13 @@ class RemoteRuntime(Runtime):
|
||||
{'X-Session-API-Key': start_response['session_api_key']}
|
||||
)
|
||||
|
||||
@tenacity.retry(
|
||||
stop=tenacity.stop_after_delay(180) | stop_if_should_exit(),
|
||||
reraise=True,
|
||||
retry=tenacity.retry_if_exception_type(RuntimeNotReadyError),
|
||||
wait=tenacity.wait_fixed(2),
|
||||
)
|
||||
def _wait_until_alive(self):
|
||||
retry_decorator = tenacity.retry(
|
||||
stop=tenacity.stop_after_delay(
|
||||
self.config.sandbox.remote_runtime_init_timeout
|
||||
)
|
||||
| stop_if_should_exit(),
|
||||
reraise=True,
|
||||
retry=tenacity.retry_if_exception_type(RuntimeNotReadyError),
|
||||
wait=tenacity.wait_fixed(2),
|
||||
)
|
||||
return retry_decorator(self._wait_until_alive_impl)()
|
||||
|
||||
def _wait_until_alive_impl(self):
|
||||
self.log('debug', f'Waiting for runtime to be alive at url: {self.runtime_url}')
|
||||
runtime_info_response = self._send_request(
|
||||
'GET',
|
||||
|
||||
@@ -3,11 +3,15 @@ import os
|
||||
import frontmatter
|
||||
import pydantic
|
||||
|
||||
from openhands.controller.agent import Agent
|
||||
from openhands.core.exceptions import MicroAgentValidationError
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
|
||||
class MicroAgentMetadata(pydantic.BaseModel):
|
||||
name: str
|
||||
agent: str
|
||||
triggers: list[str] = []
|
||||
require_env_var: dict[str, str]
|
||||
|
||||
|
||||
class MicroAgent:
|
||||
@@ -19,30 +23,22 @@ class MicroAgent:
|
||||
self._loaded = frontmatter.load(file)
|
||||
self._content = self._loaded.content
|
||||
self._metadata = MicroAgentMetadata(**self._loaded.metadata)
|
||||
|
||||
def get_trigger(self, message: str) -> str | None:
|
||||
message = message.lower()
|
||||
for trigger in self.triggers:
|
||||
if trigger.lower() in message:
|
||||
return trigger
|
||||
return None
|
||||
self._validate_micro_agent()
|
||||
|
||||
@property
|
||||
def content(self) -> str:
|
||||
return self._content
|
||||
|
||||
@property
|
||||
def metadata(self) -> MicroAgentMetadata:
|
||||
return self._metadata
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self._metadata.name
|
||||
|
||||
@property
|
||||
def triggers(self) -> list[str]:
|
||||
return self._metadata.triggers
|
||||
|
||||
@property
|
||||
def agent(self) -> str:
|
||||
return self._metadata.agent
|
||||
def _validate_micro_agent(self):
|
||||
logger.debug(
|
||||
f'Loading and validating micro agent [{self._metadata.name}] based on [{self._metadata.agent}]'
|
||||
)
|
||||
# Make sure the agent is registered
|
||||
agent_cls = Agent.get_cls(self._metadata.agent)
|
||||
assert agent_cls is not None
|
||||
# Make sure the environment variables are set
|
||||
for env_var, instruction in self._metadata.require_env_var.items():
|
||||
if env_var not in os.environ:
|
||||
raise MicroAgentValidationError(
|
||||
f'Environment variable [{env_var}] is required by micro agent [{self._metadata.name}] but not set. {instruction}'
|
||||
)
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
import os
|
||||
from itertools import islice
|
||||
|
||||
from jinja2 import Template
|
||||
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.message import Message, TextContent
|
||||
from openhands.utils.microagent import MicroAgent
|
||||
|
||||
|
||||
@@ -19,31 +16,21 @@ class PromptManager:
|
||||
Attributes:
|
||||
prompt_dir (str): Directory containing prompt templates.
|
||||
agent_skills_docs (str): Documentation of agent skills.
|
||||
micro_agent (MicroAgent | None): Micro-agent, if specified.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
prompt_dir: str,
|
||||
microagent_dir: str = '',
|
||||
agent_skills_docs: str = '',
|
||||
agent_skills_docs: str,
|
||||
micro_agent: MicroAgent | None = None,
|
||||
):
|
||||
self.prompt_dir: str = prompt_dir
|
||||
self.agent_skills_docs: str = agent_skills_docs
|
||||
|
||||
self.system_template: Template = self._load_template('system_prompt')
|
||||
self.user_template: Template = self._load_template('user_prompt')
|
||||
self.microagents: dict = {}
|
||||
|
||||
microagent_files = []
|
||||
if microagent_dir:
|
||||
microagent_files = [
|
||||
os.path.join(microagent_dir, f)
|
||||
for f in os.listdir(microagent_dir)
|
||||
if f.endswith('.md')
|
||||
]
|
||||
for microagent_file in microagent_files:
|
||||
microagent = MicroAgent(microagent_file)
|
||||
self.microagents[microagent.name] = microagent
|
||||
self.micro_agent: MicroAgent | None = micro_agent
|
||||
|
||||
def _load_template(self, template_name: str) -> Template:
|
||||
template_path = os.path.join(self.prompt_dir, f'{template_name}.j2')
|
||||
@@ -52,13 +39,15 @@ class PromptManager:
|
||||
with open(template_path, 'r') as file:
|
||||
return Template(file.read())
|
||||
|
||||
def get_system_message(self) -> str:
|
||||
@property
|
||||
def system_message(self) -> str:
|
||||
rendered = self.system_template.render(
|
||||
agent_skills_docs=self.agent_skills_docs,
|
||||
).strip()
|
||||
return rendered
|
||||
|
||||
def get_example_user_message(self) -> str:
|
||||
@property
|
||||
def initial_user_message(self) -> str:
|
||||
"""This is the initial user message provided to the agent
|
||||
before *actual* user instructions are provided.
|
||||
|
||||
@@ -68,39 +57,7 @@ class PromptManager:
|
||||
These additional context will convert the current generic agent
|
||||
into a more specialized agent that is tailored to the user's task.
|
||||
"""
|
||||
return self.user_template.render().strip()
|
||||
|
||||
def enhance_message(self, message: Message) -> None:
|
||||
"""Enhance the user message with additional context.
|
||||
|
||||
This method is used to enhance the user message with additional context
|
||||
about the user's task. The additional context will convert the current
|
||||
generic agent into a more specialized agent that is tailored to the user's task.
|
||||
"""
|
||||
if not message.content:
|
||||
return
|
||||
message_content = message.content[0].text
|
||||
for microagent in self.microagents.values():
|
||||
trigger = microagent.get_trigger(message_content)
|
||||
if trigger:
|
||||
micro_text = f'<extra_info>\nThe following information has been included based on a keyword match for "{trigger}". It may or may not be relevant to the user\'s request.'
|
||||
micro_text += '\n\n' + microagent.content
|
||||
micro_text += '\n</extra_info>'
|
||||
message.content.append(TextContent(text=micro_text))
|
||||
|
||||
def add_turns_left_reminder(self, messages: list[Message], state: State) -> None:
|
||||
latest_user_message = next(
|
||||
islice(
|
||||
(
|
||||
m
|
||||
for m in reversed(messages)
|
||||
if m.role == 'user'
|
||||
and any(isinstance(c, TextContent) for c in m.content)
|
||||
),
|
||||
1,
|
||||
),
|
||||
None,
|
||||
rendered = self.user_template.render(
|
||||
micro_agent=self.micro_agent.content if self.micro_agent else None
|
||||
)
|
||||
if latest_user_message:
|
||||
reminder_text = f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with <finish></finish>.'
|
||||
latest_user_message.content.append(TextContent(text=reminder_text))
|
||||
return rendered.strip()
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from pytest import MonkeyPatch
|
||||
|
||||
import openhands.agenthub # noqa: F401
|
||||
from openhands.core.exceptions import (
|
||||
AgentNotRegisteredError,
|
||||
MicroAgentValidationError,
|
||||
)
|
||||
from openhands.utils.microagent import MicroAgent
|
||||
|
||||
CONTENT = (
|
||||
@@ -29,3 +34,40 @@ def test_micro_agent_load(tmp_path, monkeypatch: MonkeyPatch):
|
||||
micro_agent = MicroAgent(os.path.join(tmp_path, 'dummy.md'))
|
||||
assert micro_agent is not None
|
||||
assert micro_agent.content == CONTENT.strip()
|
||||
|
||||
|
||||
def test_not_existing_agent(tmp_path, monkeypatch: MonkeyPatch):
|
||||
with open(os.path.join(tmp_path, 'dummy.md'), 'w') as f:
|
||||
f.write(
|
||||
(
|
||||
'---\n'
|
||||
'name: dummy\n'
|
||||
'agent: NotExistingAgent\n'
|
||||
'require_env_var:\n'
|
||||
' SANDBOX_OPENHANDS_TEST_ENV_VAR: "Set this environment variable for testing purposes"\n'
|
||||
'---\n' + CONTENT
|
||||
)
|
||||
)
|
||||
monkeypatch.setenv('SANDBOX_OPENHANDS_TEST_ENV_VAR', 'dummy_value')
|
||||
|
||||
with pytest.raises(AgentNotRegisteredError):
|
||||
MicroAgent(os.path.join(tmp_path, 'dummy.md'))
|
||||
|
||||
|
||||
def test_not_existing_env_var(tmp_path):
|
||||
with open(os.path.join(tmp_path, 'dummy.md'), 'w') as f:
|
||||
f.write(
|
||||
(
|
||||
'---\n'
|
||||
'name: dummy\n'
|
||||
'agent: CodeActAgent\n'
|
||||
'require_env_var:\n'
|
||||
' SANDBOX_OPENHANDS_TEST_ENV_VAR: "Set this environment variable for testing purposes"\n'
|
||||
'---\n' + CONTENT
|
||||
)
|
||||
)
|
||||
|
||||
with pytest.raises(MicroAgentValidationError) as excinfo:
|
||||
MicroAgent(os.path.join(tmp_path, 'dummy.md'))
|
||||
|
||||
assert 'Set this environment variable for testing purposes' in str(excinfo.value)
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import os
|
||||
import shutil
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from openhands.core.message import Message, TextContent
|
||||
from openhands.utils.microagent import MicroAgent
|
||||
from openhands.utils.prompt import PromptManager
|
||||
|
||||
@@ -11,9 +11,7 @@ from openhands.utils.prompt import PromptManager
|
||||
@pytest.fixture
|
||||
def prompt_dir(tmp_path):
|
||||
# Copy contents from "openhands/agenthub/codeact_agent" to the temp directory
|
||||
shutil.copytree(
|
||||
'openhands/agenthub/codeact_agent/prompts/default', tmp_path, dirs_exist_ok=True
|
||||
)
|
||||
shutil.copytree('openhands/agenthub/codeact_agent', tmp_path, dirs_exist_ok=True)
|
||||
|
||||
# Return the temporary directory path
|
||||
return tmp_path
|
||||
@@ -27,79 +25,78 @@ def agent_skills_docs():
|
||||
return SAMPLE_AGENT_SKILLS_DOCS
|
||||
|
||||
|
||||
def test_prompt_manager_without_microagent(prompt_dir, agent_skills_docs):
|
||||
manager = PromptManager(
|
||||
prompt_dir, microagent_dir='', agent_skills_docs=agent_skills_docs
|
||||
)
|
||||
def test_prompt_manager_without_micro_agent(prompt_dir, agent_skills_docs):
|
||||
manager = PromptManager(prompt_dir, agent_skills_docs)
|
||||
|
||||
assert manager.prompt_dir == prompt_dir
|
||||
assert manager.agent_skills_docs == agent_skills_docs
|
||||
assert len(manager.microagents) == 0
|
||||
assert manager.micro_agent is None
|
||||
|
||||
assert isinstance(manager.get_system_message(), str)
|
||||
assert isinstance(manager.system_message, str)
|
||||
assert (
|
||||
"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed answers to the user's questions."
|
||||
in manager.get_system_message()
|
||||
in manager.system_message
|
||||
)
|
||||
assert SAMPLE_AGENT_SKILLS_DOCS in manager.get_system_message()
|
||||
assert isinstance(manager.get_example_user_message(), str)
|
||||
assert '--- BEGIN OF GUIDELINE ---' not in manager.get_example_user_message()
|
||||
assert '--- END OF GUIDELINE ---' not in manager.get_example_user_message()
|
||||
assert "NOW, LET'S START!" in manager.get_example_user_message()
|
||||
assert 'microagent' not in manager.get_example_user_message()
|
||||
assert SAMPLE_AGENT_SKILLS_DOCS in manager.system_message
|
||||
assert isinstance(manager.initial_user_message, str)
|
||||
assert '--- BEGIN OF GUIDELINE ---' not in manager.initial_user_message
|
||||
assert '--- END OF GUIDELINE ---' not in manager.initial_user_message
|
||||
assert "NOW, LET'S START!" in manager.initial_user_message
|
||||
assert 'micro_agent' not in manager.initial_user_message
|
||||
|
||||
|
||||
def test_prompt_manager_with_microagent(prompt_dir, agent_skills_docs):
|
||||
microagent_name = 'test_microagent'
|
||||
microagent_content = """
|
||||
---
|
||||
name: flarglebargle
|
||||
agent: CodeActAgent
|
||||
triggers:
|
||||
- flarglebargle
|
||||
---
|
||||
|
||||
IMPORTANT! The user has said the magic word "flarglebargle". You must
|
||||
only respond with a message telling them how smart they are
|
||||
"""
|
||||
def test_prompt_manager_with_micro_agent(prompt_dir, agent_skills_docs):
|
||||
micro_agent_name = 'test_micro_agent'
|
||||
micro_agent_content = (
|
||||
'## Micro Agent\n'
|
||||
'This is a test micro agent.\n'
|
||||
'It is used to test the prompt manager.\n'
|
||||
)
|
||||
|
||||
# Create a temporary micro agent file
|
||||
os.makedirs(os.path.join(prompt_dir, 'micro'), exist_ok=True)
|
||||
with open(os.path.join(prompt_dir, 'micro', f'{microagent_name}.md'), 'w') as f:
|
||||
f.write(microagent_content)
|
||||
with open(os.path.join(prompt_dir, 'micro', f'{micro_agent_name}.md'), 'w') as f:
|
||||
f.write(micro_agent_content)
|
||||
|
||||
# Mock MicroAgent
|
||||
mock_micro_agent = Mock(spec=MicroAgent)
|
||||
mock_micro_agent.content = micro_agent_content
|
||||
|
||||
manager = PromptManager(
|
||||
prompt_dir=prompt_dir,
|
||||
microagent_dir=os.path.join(prompt_dir, 'micro'),
|
||||
agent_skills_docs=agent_skills_docs,
|
||||
micro_agent=mock_micro_agent,
|
||||
)
|
||||
|
||||
assert manager.prompt_dir == prompt_dir
|
||||
assert manager.agent_skills_docs == agent_skills_docs
|
||||
assert len(manager.microagents) == 1
|
||||
assert manager.micro_agent == mock_micro_agent
|
||||
|
||||
assert isinstance(manager.get_system_message(), str)
|
||||
assert isinstance(manager.system_message, str)
|
||||
assert (
|
||||
"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed answers to the user's questions."
|
||||
in manager.get_system_message()
|
||||
in manager.system_message
|
||||
)
|
||||
assert SAMPLE_AGENT_SKILLS_DOCS in manager.get_system_message()
|
||||
assert SAMPLE_AGENT_SKILLS_DOCS in manager.system_message
|
||||
|
||||
assert isinstance(manager.get_example_user_message(), str)
|
||||
assert isinstance(manager.initial_user_message, str)
|
||||
assert (
|
||||
'--- BEGIN OF GUIDELINE ---\n'
|
||||
+ 'The following information may assist you in completing your task:\n\n'
|
||||
+ micro_agent_content
|
||||
+ '\n'
|
||||
+ '--- END OF GUIDELINE ---\n'
|
||||
+ "\n\nNOW, LET'S START!"
|
||||
) in manager.initial_user_message
|
||||
assert micro_agent_content in manager.initial_user_message
|
||||
|
||||
message = Message(
|
||||
role='user',
|
||||
content=[TextContent(text='Hello, flarglebargle!')],
|
||||
)
|
||||
manager.enhance_message(message)
|
||||
assert 'magic word' in message.content[1].text
|
||||
|
||||
os.remove(os.path.join(prompt_dir, 'micro', f'{microagent_name}.md'))
|
||||
# Clean up the temporary file
|
||||
os.remove(os.path.join(prompt_dir, 'micro', f'{micro_agent_name}.md'))
|
||||
|
||||
|
||||
def test_prompt_manager_file_not_found(prompt_dir, agent_skills_docs):
|
||||
with pytest.raises(FileNotFoundError):
|
||||
MicroAgent(os.path.join(prompt_dir, 'micro', 'non_existent_microagent.md'))
|
||||
MicroAgent(os.path.join(prompt_dir, 'micro', 'non_existent_micro_agent.md'))
|
||||
|
||||
|
||||
def test_prompt_manager_template_rendering(prompt_dir, agent_skills_docs):
|
||||
@@ -107,14 +104,12 @@ def test_prompt_manager_template_rendering(prompt_dir, agent_skills_docs):
|
||||
with open(os.path.join(prompt_dir, 'system_prompt.j2'), 'w') as f:
|
||||
f.write('System prompt: {{ agent_skills_docs }}')
|
||||
with open(os.path.join(prompt_dir, 'user_prompt.j2'), 'w') as f:
|
||||
f.write('User prompt: foo')
|
||||
f.write('User prompt: {{ micro_agent }}')
|
||||
|
||||
manager = PromptManager(
|
||||
prompt_dir, microagent_dir='', agent_skills_docs=agent_skills_docs
|
||||
)
|
||||
manager = PromptManager(prompt_dir, agent_skills_docs)
|
||||
|
||||
assert manager.get_system_message() == f'System prompt: {agent_skills_docs}'
|
||||
assert manager.get_example_user_message() == 'User prompt: foo'
|
||||
assert manager.system_message == f'System prompt: {agent_skills_docs}'
|
||||
assert manager.initial_user_message == 'User prompt: None'
|
||||
|
||||
# Clean up temporary files
|
||||
os.remove(os.path.join(prompt_dir, 'system_prompt.j2'))
|
||||
|
||||
Reference in New Issue
Block a user