Compare commits

...

8 Commits

Author SHA1 Message Date
openhands
b19476a6c5 fix: Pass base commit to send_pull_request to ensure consistent repository state 2025-01-09 05:37:11 +00:00
Graham Neubig
5458ebbd7d Fix issue #6048: Update documentation of recommended models and add deepseek (#6050)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2025-01-09 02:39:53 +00:00
Robert Brennan
c411a29db4 Move GitHub Token export to backend (#6153)
Co-authored-by: openhands <openhands@all-hands.dev>
2025-01-08 21:12:46 +00:00
ross
386e04a2ba Fix field deprecation in runloop runtime client (#6152) 2025-01-08 15:18:24 -05:00
sp.wack
62c4bab6ba hotfix(frontend): Prevent a redirect when clicking edit (#6151) 2025-01-08 19:53:24 +00:00
sp.wack
e308b6fb6f chore(backend): Update default conversation title logic (#6138) 2025-01-08 22:30:29 +04:00
Engel Nyst
27a660fb6b Make runtime logs optional (#6141) 2025-01-08 19:20:46 +01:00
sp.wack
27d761a1fe chore(frontend): Improve conversation card (#6121) 2025-01-08 21:57:57 +04:00
15 changed files with 103 additions and 121 deletions

View File

@@ -259,16 +259,19 @@ jobs:
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
PYTHONPATH: ""
run: |
cd /tmp && BASE_COMMIT=$(cd repo && git rev-parse HEAD) && \
if [ "${{ steps.check_result.outputs.RESOLUTION_SUCCESS }}" == "true" ]; then
cd /tmp && python -m openhands.resolver.send_pull_request \
python -m openhands.resolver.send_pull_request \
--issue-number ${{ env.ISSUE_NUMBER }} \
--pr-type draft \
--reviewer ${{ github.actor }} | tee pr_result.txt && \
--reviewer ${{ github.actor }} \
--base-commit "$BASE_COMMIT" | tee pr_result.txt && \
grep "draft created" pr_result.txt | sed 's/.*\///g' > pr_number.txt
else
cd /tmp && python -m openhands.resolver.send_pull_request \
python -m openhands.resolver.send_pull_request \
--issue-number ${{ env.ISSUE_NUMBER }} \
--pr-type branch \
--base-commit "$BASE_COMMIT" \
--send-on-failure | tee branch_result.txt && \
grep "branch created" branch_result.txt | sed 's/.*\///g; s/.expand=1//g' > branch_name.txt
fi

View File

@@ -5,23 +5,14 @@ OpenHands can connect to any LLM supported by LiteLLM. However, it requires a po
## Model Recommendations
Based on our evaluations of language models for coding tasks (using the SWE-bench dataset), we can provide some
recommendations for model selection. Some analyses can be found in [this blog article comparing LLMs](https://www.all-hands.dev/blog/evaluation-of-llms-as-coding-agents-on-swe-bench-at-30x-speed) and
[this blog article with some more recent results](https://www.all-hands.dev/blog/openhands-codeact-21-an-open-state-of-the-art-software-development-agent).
When choosing a model, consider both the quality of outputs and the associated costs. Here's a summary of the findings:
- Claude 3.5 Sonnet is the best by a fair amount, achieving a 53% resolve rate on SWE-Bench Verified with the default agent in OpenHands.
- GPT-4o lags behind, and o1-mini actually performed somewhat worse than GPT-4o. We went in and analyzed the results a little, and briefly it seemed like o1 was sometimes "overthinking" things, performing extra environment configuration tasks when it could just go ahead and finish the task.
- Finally, the strongest open models were Llama 3.1 405 B and deepseek-v2.5, and they performed reasonably, even besting some of the closed models.
Please refer to the [full article](https://www.all-hands.dev/blog/evaluation-of-llms-as-coding-agents-on-swe-bench-at-30x-speed) for more details.
recommendations for model selection. Our latest benchmarking results can be found in [this spreadsheet](https://docs.google.com/spreadsheets/d/1wOUdFCMyY6Nt0AIqF705KN4JKOWgeI4wUGUP60krXXs/edit?gid=0).
Based on these findings and community feedback, the following models have been verified to work reasonably well with OpenHands:
- claude-3-5-sonnet (recommended)
- gpt-4 / gpt-4o
- llama-3.1-405b
- deepseek-v2.5
- anthropic/claude-3-5-sonnet-20241022 (recommended)
- anthropic/claude-3-5-haiku-20241022
- deepseek/deepseek-chat
- gpt-4o
:::warning
OpenHands will issue many prompts to the LLM you configure. Most of these LLMs cost money, so be sure to set spending

View File

@@ -18,8 +18,8 @@ describe("ConversationCard", () => {
render(
<ConversationCard
onDelete={onDelete}
onClick={onClick}
onChangeTitle={onChangeTitle}
isActive
title="Conversation 1"
selectedRepository={null}
lastUpdatedAt="2021-10-01T12:00:00Z"
@@ -38,8 +38,8 @@ describe("ConversationCard", () => {
const { rerender } = render(
<ConversationCard
onDelete={onDelete}
onClick={onClick}
onChangeTitle={onChangeTitle}
isActive
title="Conversation 1"
selectedRepository={null}
lastUpdatedAt="2021-10-01T12:00:00Z"
@@ -53,8 +53,8 @@ describe("ConversationCard", () => {
rerender(
<ConversationCard
onDelete={onDelete}
onClick={onClick}
onChangeTitle={onChangeTitle}
isActive
title="Conversation 1"
selectedRepository="org/selectedRepository"
lastUpdatedAt="2021-10-01T12:00:00Z"
@@ -64,32 +64,13 @@ describe("ConversationCard", () => {
screen.getByTestId("conversation-card-selected-repository");
});
it("should call onClick when the card is clicked", async () => {
const user = userEvent.setup();
render(
<ConversationCard
onDelete={onDelete}
onClick={onClick}
onChangeTitle={onChangeTitle}
title="Conversation 1"
selectedRepository={null}
lastUpdatedAt="2021-10-01T12:00:00Z"
/>,
);
const card = screen.getByTestId("conversation-card");
await user.click(card);
expect(onClick).toHaveBeenCalled();
});
it("should toggle a context menu when clicking the ellipsis button", async () => {
const user = userEvent.setup();
render(
<ConversationCard
onDelete={onDelete}
onClick={onClick}
onChangeTitle={onChangeTitle}
isActive
title="Conversation 1"
selectedRepository={null}
lastUpdatedAt="2021-10-01T12:00:00Z"
@@ -112,8 +93,8 @@ describe("ConversationCard", () => {
const user = userEvent.setup();
render(
<ConversationCard
onClick={onClick}
onDelete={onDelete}
isActive
onChangeTitle={onChangeTitle}
title="Conversation 1"
selectedRepository={null}
@@ -136,8 +117,8 @@ describe("ConversationCard", () => {
const user = userEvent.setup();
render(
<ConversationCard
onClick={onClick}
onDelete={onDelete}
isActive
onChangeTitle={onChangeTitle}
title="Conversation 1"
selectedRepository="org/selectedRepository"
@@ -157,8 +138,8 @@ describe("ConversationCard", () => {
const user = userEvent.setup();
render(
<ConversationCard
onClick={onClick}
onDelete={onDelete}
isActive
title="Conversation 1"
selectedRepository={null}
lastUpdatedAt="2021-10-01T12:00:00Z"
@@ -189,8 +170,8 @@ describe("ConversationCard", () => {
const user = userEvent.setup();
render(
<ConversationCard
onClick={onClick}
onDelete={onDelete}
isActive
onChangeTitle={onChangeTitle}
title="Conversation 1"
selectedRepository={null}
@@ -213,8 +194,8 @@ describe("ConversationCard", () => {
const user = userEvent.setup();
render(
<ConversationCard
onClick={onClick}
onDelete={onDelete}
isActive
onChangeTitle={onChangeTitle}
title="Conversation 1"
selectedRepository={null}
@@ -232,8 +213,8 @@ describe("ConversationCard", () => {
const user = userEvent.setup();
render(
<ConversationCard
onClick={onClick}
onDelete={onDelete}
isActive
onChangeTitle={onChangeTitle}
title="Conversation 1"
selectedRepository={null}
@@ -256,8 +237,8 @@ describe("ConversationCard", () => {
it("should render the 'STOPPED' indicator by default", () => {
render(
<ConversationCard
onClick={onClick}
onDelete={onDelete}
isActive
onChangeTitle={onChangeTitle}
title="Conversation 1"
selectedRepository={null}
@@ -271,8 +252,8 @@ describe("ConversationCard", () => {
it("should render the other indicators when provided", () => {
render(
<ConversationCard
onClick={onClick}
onDelete={onDelete}
isActive
onChangeTitle={onChangeTitle}
title="Conversation 1"
selectedRepository={null}

View File

@@ -6,6 +6,7 @@ import {
QueryClientConfig,
} from "@tanstack/react-query";
import userEvent from "@testing-library/user-event";
import { createRoutesStub } from "react-router";
import { ConversationPanel } from "#/components/features/conversation-panel/conversation-panel";
import OpenHands from "#/api/open-hands";
import { AuthProvider } from "#/context/auth-context";
@@ -13,9 +14,15 @@ import { clickOnEditButton } from "./utils";
describe("ConversationPanel", () => {
const onCloseMock = vi.fn();
const RouterStub = createRoutesStub([
{
Component: () => <ConversationPanel onClose={onCloseMock} />,
path: "/",
},
]);
const renderConversationPanel = (config?: QueryClientConfig) =>
render(<ConversationPanel onClose={onCloseMock} />, {
render(<RouterStub />, {
wrapper: ({ children }) => (
<AuthProvider>
<QueryClientProvider client={new QueryClient(config)}>

View File

@@ -9,9 +9,9 @@ import { EllipsisButton } from "./ellipsis-button";
import { ConversationCardContextMenu } from "./conversation-card-context-menu";
interface ConversationCardProps {
onClick: () => void;
onDelete: () => void;
onChangeTitle: (title: string) => void;
isActive: boolean;
title: string;
selectedRepository: string | null;
lastUpdatedAt: string; // ISO 8601
@@ -19,9 +19,9 @@ interface ConversationCardProps {
}
export function ConversationCard({
onClick,
onDelete,
onChangeTitle,
isActive,
title,
selectedRepository,
lastUpdatedAt,
@@ -51,15 +51,18 @@ export function ConversationCard({
};
const handleInputClick = (event: React.MouseEvent<HTMLInputElement>) => {
event.preventDefault();
event.stopPropagation();
};
const handleDelete = (event: React.MouseEvent<HTMLButtonElement>) => {
event.preventDefault();
event.stopPropagation();
onDelete();
};
const handleEdit = (event: React.MouseEvent<HTMLButtonElement>) => {
event.preventDefault();
event.stopPropagation();
setTitleMode("edit");
setContextMenuVisible(false);
@@ -74,26 +77,29 @@ export function ConversationCard({
return (
<div
data-testid="conversation-card"
onClick={onClick}
className="h-[100px] w-full px-[18px] py-4 border-b border-neutral-600 cursor-pointer"
>
<div className="flex items-center justify-between space-x-1">
<input
data-testid="conversation-card-title"
ref={inputRef}
disabled={titleMode === "view"}
onClick={handleInputClick}
onBlur={handleBlur}
onKeyUp={handleKeyUp}
type="text"
defaultValue={title}
className="text-sm leading-6 font-semibold bg-transparent w-full"
/>
<div className="flex items-center justify-between">
<div className="flex items-center gap-2 w-full">
{isActive && <span className="w-2 h-2 bg-blue-500 rounded-full" />}
<input
ref={inputRef}
disabled={titleMode === "view"}
data-testid="conversation-card-title"
onClick={handleInputClick}
onBlur={handleBlur}
onKeyUp={handleKeyUp}
type="text"
defaultValue={title}
className="text-sm leading-6 font-semibold bg-transparent w-full"
/>
</div>
<div className="flex items-center gap-2 relative">
<ConversationStateIndicator status={status} />
<EllipsisButton
onClick={(event) => {
event.preventDefault();
event.stopPropagation();
setContextMenuVisible((prev) => !prev);
}}

View File

@@ -1,5 +1,5 @@
import React from "react";
import { useNavigate, useParams } from "react-router";
import { NavLink, useParams } from "react-router";
import { ConversationCard } from "./conversation-card";
import { useUserConversations } from "#/hooks/query/use-user-conversations";
import { useDeleteConversation } from "#/hooks/mutation/use-delete-conversation";
@@ -16,7 +16,6 @@ interface ConversationPanelProps {
export function ConversationPanel({ onClose }: ConversationPanelProps) {
const { conversationId: cid } = useParams();
const navigate = useNavigate();
const endSession = useEndSession();
const ref = useClickOutsideElement<HTMLDivElement>(onClose);
@@ -63,11 +62,6 @@ export function ConversationPanel({ onClose }: ConversationPanelProps) {
});
};
const handleClickCard = (conversationId: string) => {
navigate(`/conversations/${conversationId}`);
onClose();
};
return (
<div
ref={ref}
@@ -88,18 +82,25 @@ export function ConversationPanel({ onClose }: ConversationPanelProps) {
</div>
)}
{conversations?.map((project) => (
<ConversationCard
<NavLink
key={project.conversation_id}
onClick={() => handleClickCard(project.conversation_id)}
onDelete={() => handleDeleteProject(project.conversation_id)}
onChangeTitle={(title) =>
handleChangeTitle(project.conversation_id, project.title, title)
}
title={project.title}
selectedRepository={project.selected_repository}
lastUpdatedAt={project.last_updated_at}
status={project.status}
/>
to={`/conversations/${project.conversation_id}`}
onClick={onClose}
>
{({ isActive }) => (
<ConversationCard
isActive={isActive}
onDelete={() => handleDeleteProject(project.conversation_id)}
onChangeTitle={(title) =>
handleChangeTitle(project.conversation_id, project.title, title)
}
title={project.title}
selectedRepository={project.selected_repository}
lastUpdatedAt={project.last_updated_at}
status={project.status}
/>
)}
</NavLink>
))}
{confirmDeleteModalVisible && (

View File

@@ -1,24 +1,16 @@
import React from "react";
import toast from "react-hot-toast";
import { useDispatch, useSelector } from "react-redux";
import { useAuth } from "#/context/auth-context";
import { useWsClient } from "#/context/ws-client-provider";
import { getGitHubTokenCommand } from "#/services/terminal-service";
import { setImportedProjectZip } from "#/state/initial-query-slice";
import { RootState } from "#/store";
import { base64ToBlob } from "#/utils/base64-to-blob";
import { useUploadFiles } from "../../../hooks/mutation/use-upload-files";
import { useGitHubUser } from "../../../hooks/query/use-github-user";
import { isGitHubErrorReponse } from "#/api/github-axios-instance";
import { RUNTIME_INACTIVE_STATES } from "#/types/agent-state";
export const useHandleRuntimeActive = () => {
const { gitHubToken } = useAuth();
const { send } = useWsClient();
const dispatch = useDispatch();
const { data: user } = useGitHubUser();
const { mutate: uploadFiles } = useUploadFiles();
const { curAgentState } = useSelector((state: RootState) => state.agent);
@@ -28,11 +20,6 @@ export const useHandleRuntimeActive = () => {
(state: RootState) => state.initialQuery,
);
const userId = React.useMemo(() => {
if (user && !isGitHubErrorReponse(user)) return user.id;
return null;
}, [user]);
const handleUploadFiles = (zip: string) => {
const blob = base64ToBlob(zip);
const file = new File([blob], "imported-project.zip", {
@@ -49,13 +36,6 @@ export const useHandleRuntimeActive = () => {
dispatch(setImportedProjectZip(null));
};
React.useEffect(() => {
if (runtimeActive && userId && gitHubToken) {
// Export if the user valid, this could happen mid-session so it is handled here
send(getGitHubTokenCommand(gitHubToken));
}
}, [userId, gitHubToken, runtimeActive]);
React.useEffect(() => {
if (runtimeActive && importedProjectZip) {
handleUploadFiles(importedProjectZip);

View File

@@ -4,9 +4,3 @@ export function getTerminalCommand(command: string, hidden: boolean = false) {
const event = { action: ActionType.RUN, args: { command, hidden } };
return event;
}
export function getGitHubTokenCommand(gitHubToken: string) {
const command = `export GITHUB_TOKEN=${gitHubToken}`;
const event = getTerminalCommand(command, true);
return event;
}

View File

@@ -1,6 +1,10 @@
// Here are the list of verified models and providers that we know work well with OpenHands.
export const VERIFIED_PROVIDERS = ["openai", "azure", "anthropic"];
export const VERIFIED_MODELS = ["gpt-4o", "claude-3-5-sonnet-20241022"];
export const VERIFIED_PROVIDERS = ["openai", "azure", "anthropic", "deepseek"];
export const VERIFIED_MODELS = [
"gpt-4o",
"claude-3-5-sonnet-20241022",
"deepseek-chat",
];
// LiteLLM does not return OpenAI models with the provider, so we list them here to set them ourselves for consistency
// (e.g., they return `gpt-4o` instead of `openai/gpt-4o`)
@@ -21,6 +25,7 @@ export const VERIFIED_ANTHROPIC_MODELS = [
"claude-2.1",
"claude-3-5-sonnet-20240620",
"claude-3-5-sonnet-20241022",
"claude-3-5-haiku-20241022",
"claude-3-haiku-20240307",
"claude-3-opus-20240229",
"claude-3-sonnet-20240229",

View File

@@ -20,6 +20,9 @@ DISABLE_COLOR_PRINTING = False
LOG_ALL_EVENTS = os.getenv('LOG_ALL_EVENTS', 'False').lower() in ['true', '1', 'yes']
# Controls whether to stream Docker container logs
DEBUG_RUNTIME = os.getenv('DEBUG_RUNTIME', 'False').lower() in ['true', '1', 'yes']
ColorType = Literal[
'red',
'green',

View File

@@ -10,9 +10,8 @@ from openhands.core.config import AppConfig
from openhands.core.exceptions import (
AgentRuntimeDisconnectedError,
AgentRuntimeNotFoundError,
AgentRuntimeNotReadyError,
)
from openhands.core.logger import DEBUG
from openhands.core.logger import DEBUG, DEBUG_RUNTIME
from openhands.core.logger import openhands_logger as logger
from openhands.events import EventStream
from openhands.runtime.builder import DockerRuntimeBuilder
@@ -139,7 +138,10 @@ class DockerRuntime(ActionExecutionClient):
f'Container started: {self.container_name}. VSCode URL: {self.vscode_url}',
)
self.log_streamer = LogStreamer(self.container, self.log)
if DEBUG_RUNTIME:
self.log_streamer = LogStreamer(self.container, self.log)
else:
self.log_streamer = None
if not self.attach_to_existing:
self.log('info', f'Waiting for client to become ready at {self.api_url}...')
@@ -331,9 +333,6 @@ class DockerRuntime(ActionExecutionClient):
f'Container {self.container_name} not found.'
)
if not self.log_streamer:
raise AgentRuntimeNotReadyError('Runtime client is not ready.')
self.check_if_alive()
def close(self, rm_all_containers: bool | None = None):

View File

@@ -249,6 +249,8 @@ class RemoteRuntime(ActionExecutionClient):
timeout=60,
):
pass
self._wait_until_alive()
self.setup_initial_env()
self.log('debug', 'Runtime resumed.')
def _parse_runtime_response(self, response: requests.Response):
@@ -388,7 +390,6 @@ class RemoteRuntime(ActionExecutionClient):
elif e.response.status_code == 503:
self.log('warning', 'Runtime appears to be paused. Resuming...')
self._resume_runtime()
self._wait_until_alive()
return super()._send_action_server_request(method, url, **kwargs)
else:
raise e

View File

@@ -115,13 +115,15 @@ class RunloopRuntime(ActionExecutionClient):
devbox = self.runloop_api_client.devboxes.create(
entrypoint=entrypoint,
setup_commands=[f'mkdir -p {self.config.workspace_mount_path_in_sandbox}'],
name=self.sid,
environment_variables={'DEBUG': 'true'} if self.config.debug else {},
prebuilt='openhands',
launch_parameters=LaunchParameters(
available_ports=[self._sandbox_port, self._vscode_port],
resource_size_request='LARGE',
launch_commands=[
f'mkdir -p {self.config.workspace_mount_path_in_sandbox}'
],
),
metadata={'container-name': self.container_name},
)

View File

@@ -66,9 +66,10 @@ async def new_conversation(request: Request, data: InitSessionRequest):
conversation_id = uuid.uuid4().hex
logger.info(f'New conversation ID: {conversation_id}')
conversation_title = (
data.selected_repository or f'Conversation {conversation_id[:5]}'
repository_title = (
data.selected_repository.split('/')[-1] if data.selected_repository else None
)
conversation_title = f'{repository_title or "Conversation"} {conversation_id[:5]}'
logger.info(f'Saving metadata for conversation {conversation_id}')
await conversation_store.save_metadata(

View File

@@ -180,6 +180,13 @@ class AgentSession:
logger.debug(f'Initializing runtime `{runtime_name}` now...')
runtime_cls = get_runtime_cls(runtime_name)
env_vars = (
{
'GITHUB_TOKEN': github_token,
}
if github_token
else None
)
self.runtime = runtime_cls(
config=config,
event_stream=self.event_stream,
@@ -187,6 +194,7 @@ class AgentSession:
plugins=agent.sandbox_plugins,
status_callback=self._status_callback,
headless_mode=False,
env_vars=env_vars,
)
# FIXME: this sleep is a terrible hack.