mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0925798aee | |||
| 43d782da06 | |||
| 4876d811a1 | |||
| 0ab457f1d3 | |||
| 70e29f9b75 | |||
| 4cd1d80eea |
@@ -259,19 +259,16 @@ jobs:
|
||||
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
|
||||
PYTHONPATH: ""
|
||||
run: |
|
||||
cd /tmp && BASE_COMMIT=$(cd repo && git rev-parse HEAD) && \
|
||||
if [ "${{ steps.check_result.outputs.RESOLUTION_SUCCESS }}" == "true" ]; then
|
||||
python -m openhands.resolver.send_pull_request \
|
||||
cd /tmp && python -m openhands.resolver.send_pull_request \
|
||||
--issue-number ${{ env.ISSUE_NUMBER }} \
|
||||
--pr-type draft \
|
||||
--reviewer ${{ github.actor }} \
|
||||
--base-commit "$BASE_COMMIT" | tee pr_result.txt && \
|
||||
--reviewer ${{ github.actor }} | tee pr_result.txt && \
|
||||
grep "draft created" pr_result.txt | sed 's/.*\///g' > pr_number.txt
|
||||
else
|
||||
python -m openhands.resolver.send_pull_request \
|
||||
cd /tmp && python -m openhands.resolver.send_pull_request \
|
||||
--issue-number ${{ env.ISSUE_NUMBER }} \
|
||||
--pr-type branch \
|
||||
--base-commit "$BASE_COMMIT" \
|
||||
--send-on-failure | tee branch_result.txt && \
|
||||
grep "branch created" branch_result.txt | sed 's/.*\///g; s/.expand=1//g' > branch_name.txt
|
||||
fi
|
||||
|
||||
@@ -5,14 +5,23 @@ OpenHands can connect to any LLM supported by LiteLLM. However, it requires a po
|
||||
## Model Recommendations
|
||||
|
||||
Based on our evaluations of language models for coding tasks (using the SWE-bench dataset), we can provide some
|
||||
recommendations for model selection. Our latest benchmarking results can be found in [this spreadsheet](https://docs.google.com/spreadsheets/d/1wOUdFCMyY6Nt0AIqF705KN4JKOWgeI4wUGUP60krXXs/edit?gid=0).
|
||||
recommendations for model selection. Some analyses can be found in [this blog article comparing LLMs](https://www.all-hands.dev/blog/evaluation-of-llms-as-coding-agents-on-swe-bench-at-30x-speed) and
|
||||
[this blog article with some more recent results](https://www.all-hands.dev/blog/openhands-codeact-21-an-open-state-of-the-art-software-development-agent).
|
||||
|
||||
When choosing a model, consider both the quality of outputs and the associated costs. Here's a summary of the findings:
|
||||
|
||||
- Claude 3.5 Sonnet is the best by a fair amount, achieving a 53% resolve rate on SWE-Bench Verified with the default agent in OpenHands.
|
||||
- GPT-4o lags behind, and o1-mini actually performed somewhat worse than GPT-4o. We went in and analyzed the results a little, and briefly it seemed like o1 was sometimes "overthinking" things, performing extra environment configuration tasks when it could just go ahead and finish the task.
|
||||
- Finally, the strongest open models were Llama 3.1 405 B and deepseek-v2.5, and they performed reasonably, even besting some of the closed models.
|
||||
|
||||
Please refer to the [full article](https://www.all-hands.dev/blog/evaluation-of-llms-as-coding-agents-on-swe-bench-at-30x-speed) for more details.
|
||||
|
||||
Based on these findings and community feedback, the following models have been verified to work reasonably well with OpenHands:
|
||||
|
||||
- anthropic/claude-3-5-sonnet-20241022 (recommended)
|
||||
- anthropic/claude-3-5-haiku-20241022
|
||||
- deepseek/deepseek-chat
|
||||
- gpt-4o
|
||||
- claude-3-5-sonnet (recommended)
|
||||
- gpt-4 / gpt-4o
|
||||
- llama-3.1-405b
|
||||
- deepseek-v2.5
|
||||
|
||||
:::warning
|
||||
OpenHands will issue many prompts to the LLM you configure. Most of these LLMs cost money, so be sure to set spending
|
||||
|
||||
+1
@@ -0,0 +1 @@
|
||||
{"agent_class": "CodeActAgent", "llm_config": {"model": "claude-3-5-sonnet-20241022", "api_key": null, "base_url": null, "api_version": null, "embedding_model": "local", "embedding_base_url": null, "embedding_deployment_name": null, "aws_access_key_id": null, "aws_secret_access_key": null, "aws_region_name": null, "openrouter_site_url": "https://docs.all-hands.dev/", "openrouter_app_name": "OpenHands", "num_retries": 8, "retry_multiplier": 2, "retry_min_wait": 15, "retry_max_wait": 120, "timeout": null, "max_message_chars": 30000, "temperature": 0.0, "top_p": 1.0, "custom_llm_provider": null, "max_input_tokens": null, "max_output_tokens": null, "input_cost_per_token": null, "output_cost_per_token": null, "ollama_base_url": null, "drop_params": true, "modify_params": true, "disable_vision": null, "caching_prompt": true, "log_completions": false, "log_completions_folder": "/workspace/OpenHands/logs/completions", "draft_editor": null, "custom_tokenizer": null, "native_tool_calling": null}, "max_iterations": 10, "eval_output_dir": "./dummy_eval_output_dir/dummy_dataset_descrption/CodeActAgent/claude-3-5-sonnet-20241022_maxiter_10_N_dummy_eval_note", "start_time": "2025-01-08 18:01:01", "git_commit": "007052c8aa15ea5149fff31583a3412ea7b8625a", "dataset": "dummy_dataset_descrption", "data_split": null, "details": {}, "condenser_config": {"type": "noop"}}
|
||||
+31
-12
@@ -18,8 +18,8 @@ describe("ConversationCard", () => {
|
||||
render(
|
||||
<ConversationCard
|
||||
onDelete={onDelete}
|
||||
onClick={onClick}
|
||||
onChangeTitle={onChangeTitle}
|
||||
isActive
|
||||
title="Conversation 1"
|
||||
selectedRepository={null}
|
||||
lastUpdatedAt="2021-10-01T12:00:00Z"
|
||||
@@ -38,8 +38,8 @@ describe("ConversationCard", () => {
|
||||
const { rerender } = render(
|
||||
<ConversationCard
|
||||
onDelete={onDelete}
|
||||
onClick={onClick}
|
||||
onChangeTitle={onChangeTitle}
|
||||
isActive
|
||||
title="Conversation 1"
|
||||
selectedRepository={null}
|
||||
lastUpdatedAt="2021-10-01T12:00:00Z"
|
||||
@@ -53,8 +53,8 @@ describe("ConversationCard", () => {
|
||||
rerender(
|
||||
<ConversationCard
|
||||
onDelete={onDelete}
|
||||
onClick={onClick}
|
||||
onChangeTitle={onChangeTitle}
|
||||
isActive
|
||||
title="Conversation 1"
|
||||
selectedRepository="org/selectedRepository"
|
||||
lastUpdatedAt="2021-10-01T12:00:00Z"
|
||||
@@ -64,13 +64,32 @@ describe("ConversationCard", () => {
|
||||
screen.getByTestId("conversation-card-selected-repository");
|
||||
});
|
||||
|
||||
it("should call onClick when the card is clicked", async () => {
|
||||
const user = userEvent.setup();
|
||||
render(
|
||||
<ConversationCard
|
||||
onDelete={onDelete}
|
||||
onClick={onClick}
|
||||
onChangeTitle={onChangeTitle}
|
||||
title="Conversation 1"
|
||||
selectedRepository={null}
|
||||
lastUpdatedAt="2021-10-01T12:00:00Z"
|
||||
/>,
|
||||
);
|
||||
|
||||
const card = screen.getByTestId("conversation-card");
|
||||
await user.click(card);
|
||||
|
||||
expect(onClick).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("should toggle a context menu when clicking the ellipsis button", async () => {
|
||||
const user = userEvent.setup();
|
||||
render(
|
||||
<ConversationCard
|
||||
onDelete={onDelete}
|
||||
onClick={onClick}
|
||||
onChangeTitle={onChangeTitle}
|
||||
isActive
|
||||
title="Conversation 1"
|
||||
selectedRepository={null}
|
||||
lastUpdatedAt="2021-10-01T12:00:00Z"
|
||||
@@ -93,8 +112,8 @@ describe("ConversationCard", () => {
|
||||
const user = userEvent.setup();
|
||||
render(
|
||||
<ConversationCard
|
||||
onClick={onClick}
|
||||
onDelete={onDelete}
|
||||
isActive
|
||||
onChangeTitle={onChangeTitle}
|
||||
title="Conversation 1"
|
||||
selectedRepository={null}
|
||||
@@ -117,8 +136,8 @@ describe("ConversationCard", () => {
|
||||
const user = userEvent.setup();
|
||||
render(
|
||||
<ConversationCard
|
||||
onClick={onClick}
|
||||
onDelete={onDelete}
|
||||
isActive
|
||||
onChangeTitle={onChangeTitle}
|
||||
title="Conversation 1"
|
||||
selectedRepository="org/selectedRepository"
|
||||
@@ -138,8 +157,8 @@ describe("ConversationCard", () => {
|
||||
const user = userEvent.setup();
|
||||
render(
|
||||
<ConversationCard
|
||||
onClick={onClick}
|
||||
onDelete={onDelete}
|
||||
isActive
|
||||
title="Conversation 1"
|
||||
selectedRepository={null}
|
||||
lastUpdatedAt="2021-10-01T12:00:00Z"
|
||||
@@ -170,8 +189,8 @@ describe("ConversationCard", () => {
|
||||
const user = userEvent.setup();
|
||||
render(
|
||||
<ConversationCard
|
||||
onClick={onClick}
|
||||
onDelete={onDelete}
|
||||
isActive
|
||||
onChangeTitle={onChangeTitle}
|
||||
title="Conversation 1"
|
||||
selectedRepository={null}
|
||||
@@ -194,8 +213,8 @@ describe("ConversationCard", () => {
|
||||
const user = userEvent.setup();
|
||||
render(
|
||||
<ConversationCard
|
||||
onClick={onClick}
|
||||
onDelete={onDelete}
|
||||
isActive
|
||||
onChangeTitle={onChangeTitle}
|
||||
title="Conversation 1"
|
||||
selectedRepository={null}
|
||||
@@ -213,8 +232,8 @@ describe("ConversationCard", () => {
|
||||
const user = userEvent.setup();
|
||||
render(
|
||||
<ConversationCard
|
||||
onClick={onClick}
|
||||
onDelete={onDelete}
|
||||
isActive
|
||||
onChangeTitle={onChangeTitle}
|
||||
title="Conversation 1"
|
||||
selectedRepository={null}
|
||||
@@ -237,8 +256,8 @@ describe("ConversationCard", () => {
|
||||
it("should render the 'STOPPED' indicator by default", () => {
|
||||
render(
|
||||
<ConversationCard
|
||||
onClick={onClick}
|
||||
onDelete={onDelete}
|
||||
isActive
|
||||
onChangeTitle={onChangeTitle}
|
||||
title="Conversation 1"
|
||||
selectedRepository={null}
|
||||
@@ -252,8 +271,8 @@ describe("ConversationCard", () => {
|
||||
it("should render the other indicators when provided", () => {
|
||||
render(
|
||||
<ConversationCard
|
||||
onClick={onClick}
|
||||
onDelete={onDelete}
|
||||
isActive
|
||||
onChangeTitle={onChangeTitle}
|
||||
title="Conversation 1"
|
||||
selectedRepository={null}
|
||||
|
||||
+1
-8
@@ -6,7 +6,6 @@ import {
|
||||
QueryClientConfig,
|
||||
} from "@tanstack/react-query";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { createRoutesStub } from "react-router";
|
||||
import { ConversationPanel } from "#/components/features/conversation-panel/conversation-panel";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import { AuthProvider } from "#/context/auth-context";
|
||||
@@ -14,15 +13,9 @@ import { clickOnEditButton } from "./utils";
|
||||
|
||||
describe("ConversationPanel", () => {
|
||||
const onCloseMock = vi.fn();
|
||||
const RouterStub = createRoutesStub([
|
||||
{
|
||||
Component: () => <ConversationPanel onClose={onCloseMock} />,
|
||||
path: "/",
|
||||
},
|
||||
]);
|
||||
|
||||
const renderConversationPanel = (config?: QueryClientConfig) =>
|
||||
render(<RouterStub />, {
|
||||
render(<ConversationPanel onClose={onCloseMock} />, {
|
||||
wrapper: ({ children }) => (
|
||||
<AuthProvider>
|
||||
<QueryClientProvider client={new QueryClient(config)}>
|
||||
|
||||
@@ -9,9 +9,9 @@ import { EllipsisButton } from "./ellipsis-button";
|
||||
import { ConversationCardContextMenu } from "./conversation-card-context-menu";
|
||||
|
||||
interface ConversationCardProps {
|
||||
onClick: () => void;
|
||||
onDelete: () => void;
|
||||
onChangeTitle: (title: string) => void;
|
||||
isActive: boolean;
|
||||
title: string;
|
||||
selectedRepository: string | null;
|
||||
lastUpdatedAt: string; // ISO 8601
|
||||
@@ -19,9 +19,9 @@ interface ConversationCardProps {
|
||||
}
|
||||
|
||||
export function ConversationCard({
|
||||
onClick,
|
||||
onDelete,
|
||||
onChangeTitle,
|
||||
isActive,
|
||||
title,
|
||||
selectedRepository,
|
||||
lastUpdatedAt,
|
||||
@@ -51,18 +51,15 @@ export function ConversationCard({
|
||||
};
|
||||
|
||||
const handleInputClick = (event: React.MouseEvent<HTMLInputElement>) => {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
};
|
||||
|
||||
const handleDelete = (event: React.MouseEvent<HTMLButtonElement>) => {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
onDelete();
|
||||
};
|
||||
|
||||
const handleEdit = (event: React.MouseEvent<HTMLButtonElement>) => {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
setTitleMode("edit");
|
||||
setContextMenuVisible(false);
|
||||
@@ -77,29 +74,26 @@ export function ConversationCard({
|
||||
return (
|
||||
<div
|
||||
data-testid="conversation-card"
|
||||
onClick={onClick}
|
||||
className="h-[100px] w-full px-[18px] py-4 border-b border-neutral-600 cursor-pointer"
|
||||
>
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex items-center gap-2 w-full">
|
||||
{isActive && <span className="w-2 h-2 bg-blue-500 rounded-full" />}
|
||||
<input
|
||||
ref={inputRef}
|
||||
disabled={titleMode === "view"}
|
||||
data-testid="conversation-card-title"
|
||||
onClick={handleInputClick}
|
||||
onBlur={handleBlur}
|
||||
onKeyUp={handleKeyUp}
|
||||
type="text"
|
||||
defaultValue={title}
|
||||
className="text-sm leading-6 font-semibold bg-transparent w-full"
|
||||
/>
|
||||
</div>
|
||||
<div className="flex items-center justify-between space-x-1">
|
||||
<input
|
||||
data-testid="conversation-card-title"
|
||||
ref={inputRef}
|
||||
disabled={titleMode === "view"}
|
||||
onClick={handleInputClick}
|
||||
onBlur={handleBlur}
|
||||
onKeyUp={handleKeyUp}
|
||||
type="text"
|
||||
defaultValue={title}
|
||||
className="text-sm leading-6 font-semibold bg-transparent w-full"
|
||||
/>
|
||||
|
||||
<div className="flex items-center gap-2 relative">
|
||||
<ConversationStateIndicator status={status} />
|
||||
<EllipsisButton
|
||||
onClick={(event) => {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
setContextMenuVisible((prev) => !prev);
|
||||
}}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import React from "react";
|
||||
import { NavLink, useParams } from "react-router";
|
||||
import { useNavigate, useParams } from "react-router";
|
||||
import { ConversationCard } from "./conversation-card";
|
||||
import { useUserConversations } from "#/hooks/query/use-user-conversations";
|
||||
import { useDeleteConversation } from "#/hooks/mutation/use-delete-conversation";
|
||||
@@ -16,6 +16,7 @@ interface ConversationPanelProps {
|
||||
|
||||
export function ConversationPanel({ onClose }: ConversationPanelProps) {
|
||||
const { conversationId: cid } = useParams();
|
||||
const navigate = useNavigate();
|
||||
const endSession = useEndSession();
|
||||
const ref = useClickOutsideElement<HTMLDivElement>(onClose);
|
||||
|
||||
@@ -62,6 +63,11 @@ export function ConversationPanel({ onClose }: ConversationPanelProps) {
|
||||
});
|
||||
};
|
||||
|
||||
const handleClickCard = (conversationId: string) => {
|
||||
navigate(`/conversations/${conversationId}`);
|
||||
onClose();
|
||||
};
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={ref}
|
||||
@@ -82,25 +88,18 @@ export function ConversationPanel({ onClose }: ConversationPanelProps) {
|
||||
</div>
|
||||
)}
|
||||
{conversations?.map((project) => (
|
||||
<NavLink
|
||||
<ConversationCard
|
||||
key={project.conversation_id}
|
||||
to={`/conversations/${project.conversation_id}`}
|
||||
onClick={onClose}
|
||||
>
|
||||
{({ isActive }) => (
|
||||
<ConversationCard
|
||||
isActive={isActive}
|
||||
onDelete={() => handleDeleteProject(project.conversation_id)}
|
||||
onChangeTitle={(title) =>
|
||||
handleChangeTitle(project.conversation_id, project.title, title)
|
||||
}
|
||||
title={project.title}
|
||||
selectedRepository={project.selected_repository}
|
||||
lastUpdatedAt={project.last_updated_at}
|
||||
status={project.status}
|
||||
/>
|
||||
)}
|
||||
</NavLink>
|
||||
onClick={() => handleClickCard(project.conversation_id)}
|
||||
onDelete={() => handleDeleteProject(project.conversation_id)}
|
||||
onChangeTitle={(title) =>
|
||||
handleChangeTitle(project.conversation_id, project.title, title)
|
||||
}
|
||||
title={project.title}
|
||||
selectedRepository={project.selected_repository}
|
||||
lastUpdatedAt={project.last_updated_at}
|
||||
status={project.status}
|
||||
/>
|
||||
))}
|
||||
|
||||
{confirmDeleteModalVisible && (
|
||||
|
||||
@@ -1,16 +1,24 @@
|
||||
import React from "react";
|
||||
import toast from "react-hot-toast";
|
||||
import { useDispatch, useSelector } from "react-redux";
|
||||
import { useAuth } from "#/context/auth-context";
|
||||
import { useWsClient } from "#/context/ws-client-provider";
|
||||
import { getGitHubTokenCommand } from "#/services/terminal-service";
|
||||
import { setImportedProjectZip } from "#/state/initial-query-slice";
|
||||
import { RootState } from "#/store";
|
||||
import { base64ToBlob } from "#/utils/base64-to-blob";
|
||||
import { useUploadFiles } from "../../../hooks/mutation/use-upload-files";
|
||||
|
||||
import { useGitHubUser } from "../../../hooks/query/use-github-user";
|
||||
import { isGitHubErrorReponse } from "#/api/github-axios-instance";
|
||||
import { RUNTIME_INACTIVE_STATES } from "#/types/agent-state";
|
||||
|
||||
export const useHandleRuntimeActive = () => {
|
||||
const { gitHubToken } = useAuth();
|
||||
const { send } = useWsClient();
|
||||
|
||||
const dispatch = useDispatch();
|
||||
|
||||
const { data: user } = useGitHubUser();
|
||||
const { mutate: uploadFiles } = useUploadFiles();
|
||||
const { curAgentState } = useSelector((state: RootState) => state.agent);
|
||||
|
||||
@@ -20,6 +28,11 @@ export const useHandleRuntimeActive = () => {
|
||||
(state: RootState) => state.initialQuery,
|
||||
);
|
||||
|
||||
const userId = React.useMemo(() => {
|
||||
if (user && !isGitHubErrorReponse(user)) return user.id;
|
||||
return null;
|
||||
}, [user]);
|
||||
|
||||
const handleUploadFiles = (zip: string) => {
|
||||
const blob = base64ToBlob(zip);
|
||||
const file = new File([blob], "imported-project.zip", {
|
||||
@@ -36,6 +49,13 @@ export const useHandleRuntimeActive = () => {
|
||||
dispatch(setImportedProjectZip(null));
|
||||
};
|
||||
|
||||
React.useEffect(() => {
|
||||
if (runtimeActive && userId && gitHubToken) {
|
||||
// Export if the user valid, this could happen mid-session so it is handled here
|
||||
send(getGitHubTokenCommand(gitHubToken));
|
||||
}
|
||||
}, [userId, gitHubToken, runtimeActive]);
|
||||
|
||||
React.useEffect(() => {
|
||||
if (runtimeActive && importedProjectZip) {
|
||||
handleUploadFiles(importedProjectZip);
|
||||
|
||||
@@ -4,3 +4,9 @@ export function getTerminalCommand(command: string, hidden: boolean = false) {
|
||||
const event = { action: ActionType.RUN, args: { command, hidden } };
|
||||
return event;
|
||||
}
|
||||
|
||||
export function getGitHubTokenCommand(gitHubToken: string) {
|
||||
const command = `export GITHUB_TOKEN=${gitHubToken}`;
|
||||
const event = getTerminalCommand(command, true);
|
||||
return event;
|
||||
}
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
// Here are the list of verified models and providers that we know work well with OpenHands.
|
||||
export const VERIFIED_PROVIDERS = ["openai", "azure", "anthropic", "deepseek"];
|
||||
export const VERIFIED_MODELS = [
|
||||
"gpt-4o",
|
||||
"claude-3-5-sonnet-20241022",
|
||||
"deepseek-chat",
|
||||
];
|
||||
export const VERIFIED_PROVIDERS = ["openai", "azure", "anthropic"];
|
||||
export const VERIFIED_MODELS = ["gpt-4o", "claude-3-5-sonnet-20241022"];
|
||||
|
||||
// LiteLLM does not return OpenAI models with the provider, so we list them here to set them ourselves for consistency
|
||||
// (e.g., they return `gpt-4o` instead of `openai/gpt-4o`)
|
||||
@@ -25,7 +21,6 @@ export const VERIFIED_ANTHROPIC_MODELS = [
|
||||
"claude-2.1",
|
||||
"claude-3-5-sonnet-20240620",
|
||||
"claude-3-5-sonnet-20241022",
|
||||
"claude-3-5-haiku-20241022",
|
||||
"claude-3-haiku-20240307",
|
||||
"claude-3-opus-20240229",
|
||||
"claude-3-sonnet-20240229",
|
||||
|
||||
@@ -4,6 +4,11 @@ You are OpenHands agent, a helpful AI assistant that can interact with a compute
|
||||
* When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise.
|
||||
* The assistant MUST NOT include comments in the code unless they are necessary to describe non-obvious behavior.
|
||||
</IMPORTANT>
|
||||
{% if github_repo %}
|
||||
<REPOSITORY_INFO>
|
||||
At the user's request, repository {{ github_repo }} has been cloned to directory {{ repo_directory }}.
|
||||
</REPOSITORY_INFO>
|
||||
{% endif %}
|
||||
{% if repo_instructions %}
|
||||
<REPOSITORY_INSTRUCTIONS>
|
||||
{{ repo_instructions }}
|
||||
|
||||
@@ -20,9 +20,6 @@ DISABLE_COLOR_PRINTING = False
|
||||
|
||||
LOG_ALL_EVENTS = os.getenv('LOG_ALL_EVENTS', 'False').lower() in ['true', '1', 'yes']
|
||||
|
||||
# Controls whether to stream Docker container logs
|
||||
DEBUG_RUNTIME = os.getenv('DEBUG_RUNTIME', 'False').lower() in ['true', '1', 'yes']
|
||||
|
||||
ColorType = Literal[
|
||||
'red',
|
||||
'green',
|
||||
|
||||
@@ -10,8 +10,9 @@ from openhands.core.config import AppConfig
|
||||
from openhands.core.exceptions import (
|
||||
AgentRuntimeDisconnectedError,
|
||||
AgentRuntimeNotFoundError,
|
||||
AgentRuntimeNotReadyError,
|
||||
)
|
||||
from openhands.core.logger import DEBUG, DEBUG_RUNTIME
|
||||
from openhands.core.logger import DEBUG
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events import EventStream
|
||||
from openhands.runtime.builder import DockerRuntimeBuilder
|
||||
@@ -138,10 +139,7 @@ class DockerRuntime(ActionExecutionClient):
|
||||
f'Container started: {self.container_name}. VSCode URL: {self.vscode_url}',
|
||||
)
|
||||
|
||||
if DEBUG_RUNTIME:
|
||||
self.log_streamer = LogStreamer(self.container, self.log)
|
||||
else:
|
||||
self.log_streamer = None
|
||||
self.log_streamer = LogStreamer(self.container, self.log)
|
||||
|
||||
if not self.attach_to_existing:
|
||||
self.log('info', f'Waiting for client to become ready at {self.api_url}...')
|
||||
@@ -333,6 +331,9 @@ class DockerRuntime(ActionExecutionClient):
|
||||
f'Container {self.container_name} not found.'
|
||||
)
|
||||
|
||||
if not self.log_streamer:
|
||||
raise AgentRuntimeNotReadyError('Runtime client is not ready.')
|
||||
|
||||
self.check_if_alive()
|
||||
|
||||
def close(self, rm_all_containers: bool | None = None):
|
||||
|
||||
@@ -249,8 +249,6 @@ class RemoteRuntime(ActionExecutionClient):
|
||||
timeout=60,
|
||||
):
|
||||
pass
|
||||
self._wait_until_alive()
|
||||
self.setup_initial_env()
|
||||
self.log('debug', 'Runtime resumed.')
|
||||
|
||||
def _parse_runtime_response(self, response: requests.Response):
|
||||
@@ -390,6 +388,7 @@ class RemoteRuntime(ActionExecutionClient):
|
||||
elif e.response.status_code == 503:
|
||||
self.log('warning', 'Runtime appears to be paused. Resuming...')
|
||||
self._resume_runtime()
|
||||
self._wait_until_alive()
|
||||
return super()._send_action_server_request(method, url, **kwargs)
|
||||
else:
|
||||
raise e
|
||||
|
||||
@@ -115,15 +115,13 @@ class RunloopRuntime(ActionExecutionClient):
|
||||
|
||||
devbox = self.runloop_api_client.devboxes.create(
|
||||
entrypoint=entrypoint,
|
||||
setup_commands=[f'mkdir -p {self.config.workspace_mount_path_in_sandbox}'],
|
||||
name=self.sid,
|
||||
environment_variables={'DEBUG': 'true'} if self.config.debug else {},
|
||||
prebuilt='openhands',
|
||||
launch_parameters=LaunchParameters(
|
||||
available_ports=[self._sandbox_port, self._vscode_port],
|
||||
resource_size_request='LARGE',
|
||||
launch_commands=[
|
||||
f'mkdir -p {self.config.workspace_mount_path_in_sandbox}'
|
||||
],
|
||||
),
|
||||
metadata={'container-name': self.container_name},
|
||||
)
|
||||
|
||||
@@ -66,10 +66,9 @@ async def new_conversation(request: Request, data: InitSessionRequest):
|
||||
conversation_id = uuid.uuid4().hex
|
||||
logger.info(f'New conversation ID: {conversation_id}')
|
||||
|
||||
repository_title = (
|
||||
data.selected_repository.split('/')[-1] if data.selected_repository else None
|
||||
conversation_title = (
|
||||
data.selected_repository or f'Conversation {conversation_id[:5]}'
|
||||
)
|
||||
conversation_title = f'{repository_title or "Conversation"} {conversation_id[:5]}'
|
||||
|
||||
logger.info(f'Saving metadata for conversation {conversation_id}')
|
||||
await conversation_store.save_metadata(
|
||||
|
||||
@@ -180,13 +180,6 @@ class AgentSession:
|
||||
|
||||
logger.debug(f'Initializing runtime `{runtime_name}` now...')
|
||||
runtime_cls = get_runtime_cls(runtime_name)
|
||||
env_vars = (
|
||||
{
|
||||
'GITHUB_TOKEN': github_token,
|
||||
}
|
||||
if github_token
|
||||
else None
|
||||
)
|
||||
self.runtime = runtime_cls(
|
||||
config=config,
|
||||
event_stream=self.event_stream,
|
||||
@@ -194,7 +187,6 @@ class AgentSession:
|
||||
plugins=agent.sandbox_plugins,
|
||||
status_callback=self._status_callback,
|
||||
headless_mode=False,
|
||||
env_vars=env_vars,
|
||||
)
|
||||
|
||||
# FIXME: this sleep is a terrible hack.
|
||||
@@ -212,8 +204,9 @@ class AgentSession:
|
||||
)
|
||||
return
|
||||
|
||||
repo_directory = None
|
||||
if selected_repository:
|
||||
await call_sync_from_async(
|
||||
repo_directory = await call_sync_from_async(
|
||||
self.runtime.clone_repo, github_token, selected_repository
|
||||
)
|
||||
if agent.prompt_manager:
|
||||
@@ -221,6 +214,10 @@ class AgentSession:
|
||||
self.runtime.get_microagents_from_selected_repo, selected_repository
|
||||
)
|
||||
agent.prompt_manager.load_microagents(microagents)
|
||||
# Pass GitHub repository information to the prompt manager
|
||||
agent.prompt_manager.set_repository_info(
|
||||
selected_repository, repo_directory
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f'Runtime initialized with plugins: {[plugin.name for plugin in self.runtime.plugins]}'
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from itertools import islice
|
||||
|
||||
from jinja2 import Template
|
||||
@@ -13,6 +14,14 @@ from openhands.microagent import (
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RepositoryInfo:
|
||||
"""Information about a GitHub repository that has been cloned."""
|
||||
|
||||
repo_name: str | None = None
|
||||
repo_directory: str | None = None
|
||||
|
||||
|
||||
class PromptManager:
|
||||
"""
|
||||
Manages prompt templates and micro-agents for AI interactions.
|
||||
@@ -32,9 +41,14 @@ class PromptManager:
|
||||
prompt_dir: str,
|
||||
microagent_dir: str | None = None,
|
||||
disabled_microagents: list[str] | None = None,
|
||||
github_repo: str | None = None,
|
||||
repo_directory: str | None = None,
|
||||
):
|
||||
self.disabled_microagents: list[str] = disabled_microagents or []
|
||||
self.prompt_dir: str = prompt_dir
|
||||
self.repository_info = RepositoryInfo()
|
||||
if github_repo:
|
||||
self.set_repository_info(github_repo, repo_directory)
|
||||
|
||||
self.system_template: Template = self._load_template('system_prompt')
|
||||
self.user_template: Template = self._load_template('user_prompt')
|
||||
@@ -91,7 +105,24 @@ class PromptManager:
|
||||
if repo_instructions:
|
||||
repo_instructions += '\n\n'
|
||||
repo_instructions += microagent.content
|
||||
return self.system_template.render(repo_instructions=repo_instructions).strip()
|
||||
|
||||
return self.system_template.render(
|
||||
repo_instructions=repo_instructions,
|
||||
github_repo=self.repository_info.repo_name,
|
||||
repo_directory=self.repository_info.repo_directory,
|
||||
).strip()
|
||||
|
||||
def set_repository_info(
|
||||
self, repo_name: str | None, repo_directory: str | None = None
|
||||
) -> None:
|
||||
"""Sets information about the GitHub repository that has been cloned.
|
||||
|
||||
Args:
|
||||
repo_name: The name of the GitHub repository (e.g. 'owner/repo')
|
||||
repo_directory: The directory where the repository has been cloned
|
||||
"""
|
||||
self.repository_info.repo_name = repo_name
|
||||
self.repository_info.repo_directory = repo_directory
|
||||
|
||||
def get_example_user_message(self) -> str:
|
||||
"""This is the initial user message provided to the agent
|
||||
|
||||
@@ -10,36 +10,36 @@ WORKSPACE_BASE = 'workspace'
|
||||
|
||||
def test_resolve_path():
|
||||
assert (
|
||||
files.resolve_path('test.txt', '/workspace')
|
||||
files.resolve_path('test.txt', '/workspace', WORKSPACE_BASE, SANDBOX_PATH_PREFIX)
|
||||
== Path(WORKSPACE_BASE) / 'test.txt'
|
||||
)
|
||||
assert (
|
||||
files.resolve_path('subdir/test.txt', '/workspace')
|
||||
files.resolve_path('subdir/test.txt', '/workspace', WORKSPACE_BASE, SANDBOX_PATH_PREFIX)
|
||||
== Path(WORKSPACE_BASE) / 'subdir' / 'test.txt'
|
||||
)
|
||||
assert (
|
||||
files.resolve_path(Path(SANDBOX_PATH_PREFIX) / 'test.txt', '/workspace')
|
||||
files.resolve_path(Path(SANDBOX_PATH_PREFIX) / 'test.txt', '/workspace', WORKSPACE_BASE, SANDBOX_PATH_PREFIX)
|
||||
== Path(WORKSPACE_BASE) / 'test.txt'
|
||||
)
|
||||
assert (
|
||||
files.resolve_path(
|
||||
Path(SANDBOX_PATH_PREFIX) / 'subdir' / 'test.txt', '/workspace'
|
||||
Path(SANDBOX_PATH_PREFIX) / 'subdir' / 'test.txt', '/workspace', WORKSPACE_BASE, SANDBOX_PATH_PREFIX
|
||||
)
|
||||
== Path(WORKSPACE_BASE) / 'subdir' / 'test.txt'
|
||||
)
|
||||
assert (
|
||||
files.resolve_path(
|
||||
Path(SANDBOX_PATH_PREFIX) / 'subdir' / '..' / 'test.txt', '/workspace'
|
||||
Path(SANDBOX_PATH_PREFIX) / 'subdir' / '..' / 'test.txt', '/workspace', WORKSPACE_BASE, SANDBOX_PATH_PREFIX
|
||||
)
|
||||
== Path(WORKSPACE_BASE) / 'test.txt'
|
||||
)
|
||||
with pytest.raises(PermissionError):
|
||||
files.resolve_path(Path(SANDBOX_PATH_PREFIX) / '..' / 'test.txt', '/workspace')
|
||||
files.resolve_path(Path(SANDBOX_PATH_PREFIX) / '..' / 'test.txt', '/workspace', WORKSPACE_BASE, SANDBOX_PATH_PREFIX)
|
||||
with pytest.raises(PermissionError):
|
||||
files.resolve_path(Path('..') / 'test.txt', '/workspace')
|
||||
files.resolve_path(Path('..') / 'test.txt', '/workspace', WORKSPACE_BASE, SANDBOX_PATH_PREFIX)
|
||||
with pytest.raises(PermissionError):
|
||||
files.resolve_path(Path('/') / 'test.txt', '/workspace')
|
||||
files.resolve_path(Path('/') / 'test.txt', '/workspace', WORKSPACE_BASE, SANDBOX_PATH_PREFIX)
|
||||
assert (
|
||||
files.resolve_path('test.txt', '/workspace/test')
|
||||
files.resolve_path('test.txt', '/workspace/test', WORKSPACE_BASE, SANDBOX_PATH_PREFIX)
|
||||
== Path(WORKSPACE_BASE) / 'test' / 'test.txt'
|
||||
)
|
||||
|
||||
@@ -5,7 +5,7 @@ import pytest
|
||||
|
||||
from openhands.core.message import Message, TextContent
|
||||
from openhands.microagent import BaseMicroAgent
|
||||
from openhands.utils.prompt import PromptManager
|
||||
from openhands.utils.prompt import PromptManager, RepositoryInfo
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -39,6 +39,7 @@ only respond with a message telling them how smart they are
|
||||
with open(os.path.join(prompt_dir, 'micro', f'{microagent_name}.md'), 'w') as f:
|
||||
f.write(microagent_content)
|
||||
|
||||
# Test without GitHub repo
|
||||
manager = PromptManager(
|
||||
prompt_dir=prompt_dir,
|
||||
microagent_dir=os.path.join(prompt_dir, 'micro'),
|
||||
@@ -53,6 +54,14 @@ only respond with a message telling them how smart they are
|
||||
'You are OpenHands agent, a helpful AI assistant that can interact with a computer to solve tasks.'
|
||||
in manager.get_system_message()
|
||||
)
|
||||
assert '<REPOSITORY_INFO>' not in manager.get_system_message()
|
||||
|
||||
# Test with GitHub repo
|
||||
manager.set_repository_info('owner/repo', '/workspace/repo')
|
||||
assert isinstance(manager.get_system_message(), str)
|
||||
assert '<REPOSITORY_INFO>' in manager.get_system_message()
|
||||
assert 'owner/repo' in manager.get_system_message()
|
||||
assert '/workspace/repo' in manager.get_system_message()
|
||||
|
||||
assert isinstance(manager.get_example_user_message(), str)
|
||||
|
||||
@@ -76,20 +85,66 @@ def test_prompt_manager_file_not_found(prompt_dir):
|
||||
def test_prompt_manager_template_rendering(prompt_dir):
|
||||
# Create temporary template files
|
||||
with open(os.path.join(prompt_dir, 'system_prompt.j2'), 'w') as f:
|
||||
f.write('System prompt: bar')
|
||||
f.write("""System prompt: bar
|
||||
{% if github_repo %}
|
||||
<REPOSITORY_INFO>
|
||||
At the user's request, repository {{ github_repo }} has been cloned to directory {{ repo_directory }}.
|
||||
</REPOSITORY_INFO>
|
||||
{% endif %}
|
||||
{{ repo_instructions }}""")
|
||||
with open(os.path.join(prompt_dir, 'user_prompt.j2'), 'w') as f:
|
||||
f.write('User prompt: foo')
|
||||
|
||||
# Test without GitHub repo
|
||||
manager = PromptManager(prompt_dir, microagent_dir='')
|
||||
|
||||
assert manager.get_system_message() == 'System prompt: bar'
|
||||
assert manager.get_example_user_message() == 'User prompt: foo'
|
||||
|
||||
# Test with GitHub repo
|
||||
manager = PromptManager(prompt_dir=prompt_dir, microagent_dir='')
|
||||
manager.set_repository_info('owner/repo', '/workspace/repo')
|
||||
system_msg = manager.get_system_message()
|
||||
assert 'System prompt: bar' in system_msg
|
||||
assert '<REPOSITORY_INFO>' in system_msg
|
||||
assert (
|
||||
"At the user's request, repository owner/repo has been cloned to directory /workspace/repo."
|
||||
in system_msg
|
||||
)
|
||||
assert '</REPOSITORY_INFO>' in system_msg
|
||||
assert manager.get_example_user_message() == 'User prompt: foo'
|
||||
|
||||
# Clean up temporary files
|
||||
os.remove(os.path.join(prompt_dir, 'system_prompt.j2'))
|
||||
os.remove(os.path.join(prompt_dir, 'user_prompt.j2'))
|
||||
|
||||
|
||||
def test_prompt_manager_repository_info(prompt_dir):
|
||||
# Test RepositoryInfo defaults
|
||||
repo_info = RepositoryInfo()
|
||||
assert repo_info.repo_name is None
|
||||
assert repo_info.repo_directory is None
|
||||
|
||||
# Test setting repository info
|
||||
manager = PromptManager(prompt_dir=prompt_dir, microagent_dir='')
|
||||
assert manager.repository_info.repo_name is None
|
||||
assert manager.repository_info.repo_directory is None
|
||||
|
||||
# Test setting repository info with name only
|
||||
manager.set_repository_info('owner/repo')
|
||||
assert manager.repository_info.repo_name == 'owner/repo'
|
||||
assert manager.repository_info.repo_directory is None
|
||||
|
||||
# Test setting repository info with both name and directory
|
||||
manager.set_repository_info('owner/repo2', '/workspace/repo2')
|
||||
assert manager.repository_info.repo_name == 'owner/repo2'
|
||||
assert manager.repository_info.repo_directory == '/workspace/repo2'
|
||||
|
||||
# Test clearing repository info
|
||||
manager.set_repository_info(None)
|
||||
assert manager.repository_info.repo_name is None
|
||||
assert manager.repository_info.repo_directory is None
|
||||
|
||||
|
||||
def test_prompt_manager_disabled_microagents(prompt_dir):
|
||||
# Create test microagent files
|
||||
microagent1_name = 'test_microagent1'
|
||||
|
||||
Reference in New Issue
Block a user