Fix issue #6223 : [Bug]: fix-me-experimental fails with "no such file or directory pyproject.toml"

feat(eval): reliability improvement for SWE-Bench eval_infer (#6347 )
(feat) Add trajectory replay for headless mode (#6215 )
2026-04-29 03:00:45 -04:00 · 2025-01-19 00:33:49 +00:00 · 2025-01-18 14:02:59 -05:00 · 2025-01-18 05:48:22 +00:00 · 2025-01-17 20:17:18 -07:00 · 2025-01-18 00:16:47 +00:00
28 changed files with 414 additions and 136 deletions
--- a/config.template.toml
+++ b/config.template.toml
@@ -39,6 +39,11 @@ workspace_base = "./workspace"
 # If it's a folder, the session id will be used as the file name
 #save_trajectory_path="./trajectories"

+# Path to replay a trajectory, must be a file path
+# If provided, trajectory will be loaded and replayed before the
+# agent responds to any user instruction
+#replay_trajectory_path = ""
+
 # File store path
 #file_store_path = "/tmp/file_store"

--- a/docs/modules/usage/configuration-options.md
+++ b/docs/modules/usage/configuration-options.md
@@ -55,6 +55,11 @@ The core configuration options are defined in the `[core]` section of the `confi
  - Default: `"./trajectories"`
  - Description: Path to store trajectories (can be a folder or a file). If it's a folder, the trajectories will be saved in a file named with the session id name and .json extension, in that folder.

+- `replay_trajectory_path`
+  - Type: `str`
+  - Default: `""`
+  - Description: Path to load a trajectory and replay. If given, must be a path to the trajectory file in JSON format. The actions in the trajectory file would be replayed first before any user instruction is executed.
+
 ### File Store
 - `file_store_path`
  - Type: `str`
--- a/evaluation/utils/shared.py
+++ b/evaluation/utils/shared.py
@@ -355,7 +355,9 @@ def _process_instance_wrapper(
            )
            # e is likely an EvalException, so we can't directly infer it from type
            # but rather check if it's a fatal error
-            if is_fatal_runtime_error(str(e)):
+            # But it can also be AgentRuntime**Error (e.g., swe_bench/eval_infer.py)
+            _error_str = type(e).__name__ + ': ' + str(e)
+            if is_fatal_runtime_error(_error_str):
                runtime_failure_count += 1
                msg += f'Runtime disconnected error detected for instance {instance.instance_id}, runtime failure count: {runtime_failure_count}'
                msg += '\n' + '-' * 10 + '\n'
@@ -531,6 +533,7 @@ def is_fatal_runtime_error(error: str | None) -> bool:
        return False

    FATAL_RUNTIME_ERRORS = [
+        AgentRuntimeTimeoutError,
        AgentRuntimeUnavailableError,
        AgentRuntimeDisconnectedError,
        AgentRuntimeNotFoundError,
--- a/frontend/tests/components/browser.test.tsx
+++ b/frontend/tests/components/browser.test.tsx
@@ -37,7 +37,6 @@ describe("Browser", () => {
        browser: {
          url: "https://example.com",
          screenshotSrc: "",
-          updateCount: 0,
        },
      },
    });
@@ -53,7 +52,6 @@ describe("Browser", () => {
          url: "https://example.com",
          screenshotSrc:
            "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mN0uGvyHwAFCAJS091fQwAAAABJRU5ErkJggg==",
-          updateCount: 0,
        },
      },
    });
--- a/frontend/src/components/features/chat/chat-message.tsx
+++ b/frontend/src/components/features/chat/chat-message.tsx
@@ -6,24 +6,19 @@ import { cn } from "#/utils/utils";
 import { ul, ol } from "../markdown/list";
 import { CopyToClipboardButton } from "#/components/shared/buttons/copy-to-clipboard-button";
 import { anchor } from "../markdown/anchor";
-import { JumpToFileButton } from "#/components/shared/buttons/jump-to-file-button";
-import { useFiles } from "#/context/files";

 interface ChatMessageProps {
  type: "user" | "assistant";
  message: string;
-  filePath?: string;
 }

 export function ChatMessage({
  type,
  message,
-  filePath,
  children,
 }: React.PropsWithChildren<ChatMessageProps>) {
  const [isHovering, setIsHovering] = React.useState(false);
  const [isCopy, setIsCopy] = React.useState(false);
-  const { setSelectedPath } = useFiles();

  const handleCopyToClipboard = async () => {
    await navigator.clipboard.writeText(message);
@@ -62,12 +57,6 @@ export function ChatMessage({
        onClick={handleCopyToClipboard}
        mode={isCopy ? "copied" : "copy"}
      />
-      {filePath && (
-        <JumpToFileButton
-          filePath={filePath}
-          onClick={() => setSelectedPath(filePath)}
-        />
-      )}
      <Markdown
        className="text-sm overflow-auto"
        components={{
--- a/frontend/src/components/features/chat/messages.tsx
+++ b/frontend/src/components/features/chat/messages.tsx
@@ -29,7 +29,6 @@ export const Messages: React.FC<MessagesProps> = React.memo(
          key={index}
          type={message.sender}
          message={message.content}
-          filePath={message.filePath}
        >
          {message.imageUrls && message.imageUrls.length > 0 && (
            <ImageCarousel size="small" images={message.imageUrls} />
--- a/frontend/src/components/shared/buttons/jump-to-file-button.tsx
+++ b/frontend/src/components/shared/buttons/jump-to-file-button.tsx
@@ -1,32 +0,0 @@
-import React from "react";
-import { useTranslation } from "react-i18next";
-import { VscGoToFile } from "react-icons/vsc";
-import { I18nKey } from "#/i18n/declaration";
-import { ActionTooltip } from "#/components/shared/action-tooltip";
-import { cn } from "#/utils/utils";
-
-interface JumpToFileButtonProps {
-  filePath: string;
-  onClick: () => void;
-}
-
-export function JumpToFileButton({ filePath, onClick }: JumpToFileButtonProps) {
-  const { t } = useTranslation();
-
-  return (
-    <ActionTooltip content={t(I18nKey.CHAT$JUMP_TO_FILE_TOOLTIP, { path: filePath })} side="top">
-      <button
-        type="button"
-        data-testid="jump-to-file-button"
-        onClick={onClick}
-        className={cn(
-          "absolute top-2 right-12 p-2 rounded-lg",
-          "text-neutral-400 hover:text-neutral-200 hover:bg-neutral-700",
-          "transition-colors duration-200"
-        )}
-      >
-        <VscGoToFile size={16} />
-      </button>
-    </ActionTooltip>
-  );
-}
--- a/frontend/src/i18n/translation.json
+++ b/frontend/src/i18n/translation.json
@@ -4492,21 +4492,7 @@
        "tr": "İstemcinin hazır olması bekleniyor...",
        "ja": "クライアントの準備を待機中"
    },
-    "CHAT$JUMP_TO_FILE_TOOLTIP": {
-    "en": "Jump to file: {{path}}",
-    "zh-CN": "跳转到文件：{{path}}",
-    "de": "Zur Datei springen: {{path}}",
-    "ko-KR": "파일로 이동: {{path}}",
-    "no": "Hopp til fil: {{path}}",
-    "zh-TW": "跳轉到文件：{{path}}",
-    "it": "Vai al file: {{path}}",
-    "pt": "Ir para o arquivo: {{path}}",
-    "es": "Ir al archivo: {{path}}",
-    "ar": "انتقل إلى الملف: {{path}}",
-    "fr": "Aller au fichier: {{path}}",
-    "tr": "Dosyaya git: {{path}}"
-  },
-  "SUGGESTIONS$WHAT_TO_BUILD": {
+    "SUGGESTIONS$WHAT_TO_BUILD": {
        "en": "What do you want to build?",
        "ja": "何を開発しますか？",
        "zh-CN": "你想要构建什么？",
--- a/frontend/src/message.d.ts
+++ b/frontend/src/message.d.ts
@@ -8,5 +8,4 @@ type Message = {
  pending?: boolean;
  translationID?: string;
  eventID?: number;
-  filePath?: string;
 };
--- a/frontend/src/routes/_oh.app/route.tsx
+++ b/frontend/src/routes/_oh.app/route.tsx
@@ -1,7 +1,7 @@
 import { useDisclosure } from "@nextui-org/react";
 import React from "react";
 import { Outlet } from "react-router";
-import { useDispatch, useSelector } from "react-redux";
+import { useDispatch } from "react-redux";
 import { FaServer } from "react-icons/fa";
 import toast from "react-hot-toast";
 import { useTranslation } from "react-i18next";
@@ -11,7 +11,6 @@ import {
  useConversation,
 } from "#/context/conversation-context";
 import { Controls } from "#/components/features/controls/controls";
-import { RootState } from "#/store";
 import { clearMessages } from "#/state/chat-slice";
 import { clearTerminal } from "#/state/command-slice";
 import { useEffectOnce } from "#/hooks/use-effect-once";
@@ -33,7 +32,6 @@ import {
 import Security from "#/components/shared/modals/security/security";
 import { useEndSession } from "#/hooks/use-end-session";
 import { useUserConversation } from "#/hooks/query/use-user-conversation";
-import { CountBadge } from "#/components/layout/count-badge";
 import { ServedAppLabel } from "#/components/layout/served-app-label";
 import { TerminalStatusLabel } from "#/components/features/terminal/terminal-status-label";
 import { useSettings } from "#/hooks/query/use-settings";
@@ -52,7 +50,6 @@ function AppContent() {
  const endSession = useEndSession();

  const [width, setWidth] = React.useState(window.innerWidth);
-  const { updateCount } = useSelector((state: RootState) => state.browser);

  const secrets = React.useMemo(
    () => [gitHubToken].filter((secret) => secret !== null),
@@ -144,7 +141,6 @@ function AppContent() {
                    label: (
                      <div className="flex items-center gap-1">
                        {t(I18nKey.BROWSER$TITLE)}
-                        {updateCount > 0 && <CountBadge count={updateCount} />}
                      </div>
                    ),
                    to: "browser",
--- a/frontend/src/state/browser-slice.ts
+++ b/frontend/src/state/browser-slice.ts
@@ -5,8 +5,6 @@ export const initialState = {
  url: "https://github.com/All-Hands-AI/OpenHands",
  // Base64-encoded screenshot of browser window (placeholder for now, will be replaced with the actual screenshot later)
  screenshotSrc: "",
-  // Counter for browser updates
-  updateCount: 0,
 };

 export const browserSlice = createSlice({
@@ -18,7 +16,6 @@ export const browserSlice = createSlice({
    },
    setScreenshotSrc: (state, action) => {
      state.screenshotSrc = action.payload;
-      state.updateCount += 1;
    },
  },
 });
--- a/frontend/src/state/chat-slice.ts
+++ b/frontend/src/state/chat-slice.ts
@@ -166,9 +166,8 @@ export const chatSlice = createSlice({
        }\n\nOutput:\n\`\`\`\n${content.trim() || "[Command finished execution with no output]"}\n\`\`\``;
        causeMessage.content = content; // Observation content includes the action
      } else if (observationID === "read" || observationID === "edit") {
-        const { content, extras } = observation.payload;
+        const { content } = observation.payload;
        causeMessage.content = `\`\`\`${observationID === "edit" ? "diff" : "python"}\n${content}\n\`\`\``; // Content is already truncated by the ACI
-        causeMessage.filePath = extras.path;
      } else if (observationID === "browse") {
        let content = `**URL:** ${observation.payload.extras.url}\n`;
        if (observation.payload.extras.error) {
--- a/microagents/tasks/add_openhands_repo_instruction.md
+++ b/microagents/tasks/add_openhands_repo_instruction.md
@@ -0,0 +1,65 @@
+---
+name: add_openhands_repo_instruction
+type: task
+version: 1.0.0
+author: openhands
+agent: CodeActAgent
+inputs:
+  - name: REPO_FOLDER_NAME
+    description: "Branch for the agent to work on"
+    required: false
+---
+
+Please browse the current repository under /workspace/{{ REPO_FOLDER_NAME }}, look at the documentation and relevant code, and understand the purpose of this repository.
+
+Specifically, I want you to create a `.openhands/microagents/repo.md`  file. This file should contain succinct information that summarizes (1) the purpose of this repository, (2) the general setup of this repo, and (3) a brief description of the structure of this repo.
+
+Here's an example:
+```markdown
+---
+name: repo
+type: repo
+agent: CodeActAgent
+---
+
+This repository contains the code for runtime-API, an automated AI software engineer. It has a Python backend
+(in the `openhands` directory) and React frontend (in the `frontend` directory).
+
+## General Setup:
+To set up the entire repo, including frontend and backend, run `make build`.
+You don't need to do this unless the user asks you to, or if you're trying to run the entire application.
+
+Before pushing any changes, you should ensure that any lint errors or simple test errors have been fixed.
+
+* If you've made changes to the backend, you should run `pre-commit run --all-files --config ./dev_config/python/.pre-commit-config.yaml`
+* If you've made changes to the frontend, you should run `cd frontend && npm run lint:fix && npm run build ; cd ..`
+
+If either command fails, it may have automatically fixed some issues. You should fix any issues that weren't automatically fixed,
+then re-run the command to ensure it passes.
+
+## Repository Structure
+Backend:
+- Located in the `openhands` directory
+- Testing:
+  - All tests are in `tests/unit/test_*.py`
+  - To test new code, run `poetry run pytest tests/unit/test_xxx.py` where `xxx` is the appropriate file for the current functionality
+  - Write all tests with pytest
+
+Frontend:
+- Located in the `frontend` directory
+- Prerequisites: A recent version of NodeJS / NPM
+- Setup: Run `npm install` in the frontend directory
+- Testing:
+  - Run tests: `npm run test`
+  - To run specific tests: `npm run test -- -t "TestName"`
+- Building:
+  - Build for production: `npm run build`
+- Environment Variables:
+  - Set in `frontend/.env` or as environment variables
+  - Available variables: VITE_BACKEND_HOST, VITE_USE_TLS, VITE_INSECURE_SKIP_VERIFY, VITE_FRONTEND_PORT
+- Internationalization:
+  - Generate i18n declaration file: `npm run make-i18n`
+```
+
+Now, please write a similar markdown for the current repository.
+Read all the GitHub workflows under .github/ of the repository (if this folder exists) to understand the CI checks (e.g., linter, pre-commit), and include those in the repo.md file.
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -12,6 +12,7 @@ from litellm.exceptions import (
 )

 from openhands.controller.agent import Agent
+from openhands.controller.replay import ReplayManager
 from openhands.controller.state.state import State, TrafficControlState
 from openhands.controller.stuck import StuckDetector
 from openhands.core.config import AgentConfig, LLMConfig
@@ -90,6 +91,7 @@ class AgentController:
        is_delegate: bool = False,
        headless_mode: bool = True,
        status_callback: Callable | None = None,
+        replay_events: list[Event] | None = None,
    ):
        """Initializes a new instance of the AgentController class.

@@ -108,6 +110,7 @@ class AgentController:
            is_delegate: Whether this controller is a delegate.
            headless_mode: Whether the agent is run in headless mode.
            status_callback: Optional callback function to handle status updates.
+            replay_events: A list of logs to replay.
        """
        self.id = sid
        self.agent = agent
@@ -139,6 +142,9 @@ class AgentController:
        self._stuck_detector = StuckDetector(self.state)
        self.status_callback = status_callback

+        # replay-related
+        self._replay_manager = ReplayManager(replay_events)
+
    async def close(self) -> None:
        """Closes the agent controller, canceling any ongoing tasks and unsubscribing from the event stream.

@@ -234,6 +240,11 @@ class AgentController:
            await self._react_to_exception(reported)

    def should_step(self, event: Event) -> bool:
+        """
+        Whether the agent should take a step based on an event. In general,
+        the agent should take a step if it receives a message from the user,
+        or observes something in the environment (after acting).
+        """
        # it might be the delegate's day in the sun
        if self.delegate is not None:
            return False
@@ -641,42 +652,50 @@ class AgentController:

        self.update_state_before_step()
        action: Action = NullAction()
-        try:
-            action = self.agent.step(self.state)
-            if action is None:
-                raise LLMNoActionError('No action was returned')
-        except (
-            LLMMalformedActionError,
-            LLMNoActionError,
-            LLMResponseError,
-            FunctionCallValidationError,
-            FunctionCallNotExistsError,
-        ) as e:
-            self.event_stream.add_event(
-                ErrorObservation(
-                    content=str(e),
-                ),
-                EventSource.AGENT,
-            )
-            return
-        except (ContextWindowExceededError, BadRequestError) as e:
-            # FIXME: this is a hack until a litellm fix is confirmed
-            # Check if this is a nested context window error
-            error_str = str(e).lower()
-            if (
-                'contextwindowexceedederror' in error_str
-                or 'prompt is too long' in error_str
-                or isinstance(e, ContextWindowExceededError)
-            ):
-                # When context window is exceeded, keep roughly half of agent interactions
-                self.state.history = self._apply_conversation_window(self.state.history)

-                # Save the ID of the first event in our truncated history for future reloading
-                if self.state.history:
-                    self.state.start_id = self.state.history[0].id
-                # Don't add error event - let the agent retry with reduced context
+        if self._replay_manager.should_replay():
+            # in replay mode, we don't let the agent to proceed
+            # instead, we replay the action from the replay trajectory
+            action = self._replay_manager.step()
+        else:
+            try:
+                action = self.agent.step(self.state)
+                if action is None:
+                    raise LLMNoActionError('No action was returned')
+            except (
+                LLMMalformedActionError,
+                LLMNoActionError,
+                LLMResponseError,
+                FunctionCallValidationError,
+                FunctionCallNotExistsError,
+            ) as e:
+                self.event_stream.add_event(
+                    ErrorObservation(
+                        content=str(e),
+                    ),
+                    EventSource.AGENT,
+                )
                return
-            raise
+            except (ContextWindowExceededError, BadRequestError) as e:
+                # FIXME: this is a hack until a litellm fix is confirmed
+                # Check if this is a nested context window error
+                error_str = str(e).lower()
+                if (
+                    'contextwindowexceedederror' in error_str
+                    or 'prompt is too long' in error_str
+                    or isinstance(e, ContextWindowExceededError)
+                ):
+                    # When context window is exceeded, keep roughly half of agent interactions
+                    self.state.history = self._apply_conversation_window(
+                        self.state.history
+                    )
+
+                    # Save the ID of the first event in our truncated history for future reloading
+                    if self.state.history:
+                        self.state.start_id = self.state.history[0].id
+                    # Don't add error event - let the agent retry with reduced context
+                    return
+                raise

        if action.runnable:
            if self.state.confirmation_mode and (
--- a/openhands/controller/replay.py
+++ b/openhands/controller/replay.py
@@ -0,0 +1,52 @@
+from openhands.core.logger import openhands_logger as logger
+from openhands.events.action.action import Action
+from openhands.events.event import Event, EventSource
+
+
+class ReplayManager:
+    """ReplayManager manages the lifecycle of a replay session of a given trajectory.
+
+    Replay manager keeps track of a list of events, replays actions, and ignore
+    messages and observations. It could lead to unexpected or even errorneous
+    results if any action is non-deterministic, or if the initial state before
+    the replay session is different from the initial state of the trajectory.
+    """
+
+    def __init__(self, replay_events: list[Event] | None):
+        if replay_events:
+            logger.info(f'Replay logs loaded, events length = {len(replay_events)}')
+        self.replay_events = replay_events
+        self.replay_mode = bool(replay_events)
+        self.replay_index = 0
+
+    def _replayable(self) -> bool:
+        return (
+            self.replay_events is not None
+            and self.replay_index < len(self.replay_events)
+            and isinstance(self.replay_events[self.replay_index], Action)
+            and self.replay_events[self.replay_index].source != EventSource.USER
+        )
+
+    def should_replay(self) -> bool:
+        """
+        Whether the controller is in trajectory replay mode, and the replay
+        hasn't finished. Note: after the replay is finished, the user and
+        the agent could continue to message/act.
+
+        This method also moves "replay_index" to the next action, if applicable.
+        """
+        if not self.replay_mode:
+            return False
+
+        assert self.replay_events is not None
+        while self.replay_index < len(self.replay_events) and not self._replayable():
+            self.replay_index += 1
+
+        return self._replayable()
+
+    def step(self) -> Action:
+        assert self.replay_events is not None
+        event = self.replay_events[self.replay_index]
+        assert isinstance(event, Action)
+        self.replay_index += 1
+        return event
--- a/openhands/core/config/app_config.py
+++ b/openhands/core/config/app_config.py
@@ -28,6 +28,7 @@ class AppConfig(BaseModel):
        file_store: Type of file store to use.
        file_store_path: Path to the file store.
        save_trajectory_path: Either a folder path to store trajectories with auto-generated filenames, or a designated trajectory file path.
+        replay_trajectory_path: Path to load trajectory and replay. If provided, trajectory would be replayed first before user's instruction.
        workspace_base: Base path for the workspace. Defaults to `./workspace` as absolute path.
        workspace_mount_path: Path to mount the workspace. Defaults to `workspace_base`.
        workspace_mount_path_in_sandbox: Path to mount the workspace in sandbox. Defaults to `/workspace`.
@@ -55,6 +56,7 @@ class AppConfig(BaseModel):
    file_store: str = Field(default='local')
    file_store_path: str = Field(default='/tmp/openhands_file_store')
    save_trajectory_path: str | None = Field(default=None)
+    replay_trajectory_path: str | None = Field(default=None)
    workspace_base: str | None = Field(default=None)
    workspace_mount_path: str | None = Field(default=None)
    workspace_mount_path_in_sandbox: str = Field(default='/workspace')
--- a/openhands/core/main.py
+++ b/openhands/core/main.py
@@ -2,6 +2,7 @@ import asyncio
 import json
 import os
 import sys
+from pathlib import Path
 from typing import Callable, Protocol

 import openhands.agenthub  # noqa F401 (we import this to get the agents registered)
@@ -22,10 +23,11 @@ from openhands.core.setup import (
    generate_sid,
 )
 from openhands.events import EventSource, EventStreamSubscriber
-from openhands.events.action import MessageAction
+from openhands.events.action import MessageAction, NullAction
 from openhands.events.action.action import Action
 from openhands.events.event import Event
 from openhands.events.observation import AgentStateChangedObservation
+from openhands.events.serialization import event_from_dict
 from openhands.events.serialization.event import event_to_trajectory
 from openhands.runtime.base import Runtime

@@ -101,7 +103,17 @@ async def run_controller(
    if agent is None:
        agent = create_agent(runtime, config)

-    controller, initial_state = create_controller(agent, runtime, config)
+    replay_events: list[Event] | None = None
+    if config.replay_trajectory_path:
+        logger.info('Trajectory replay is enabled')
+        assert isinstance(initial_user_action, NullAction)
+        replay_events, initial_user_action = load_replay_log(
+            config.replay_trajectory_path
+        )
+
+    controller, initial_state = create_controller(
+        agent, runtime, config, replay_events=replay_events
+    )

    assert isinstance(
        initial_user_action, Action
@@ -194,21 +206,64 @@ def auto_continue_response(
    return message


+def load_replay_log(trajectory_path: str) -> tuple[list[Event] | None, Action]:
+    """
+    Load trajectory from given path, serialize it to a list of events, and return
+    two things:
+    1) A list of events except the first action
+    2) First action (user message, a.k.a. initial task)
+    """
+    try:
+        path = Path(trajectory_path).resolve()
+
+        if not path.exists():
+            raise ValueError(f'Trajectory file not found: {path}')
+
+        if not path.is_file():
+            raise ValueError(f'Trajectory path is a directory, not a file: {path}')
+
+        with open(path, 'r', encoding='utf-8') as file:
+            data = json.load(file)
+            if not isinstance(data, list):
+                raise ValueError(
+                    f'Expected a list in {path}, got {type(data).__name__}'
+                )
+            events = []
+            for item in data:
+                event = event_from_dict(item)
+                # cannot add an event with _id to event stream
+                event._id = None  # type: ignore[attr-defined]
+                events.append(event)
+            assert isinstance(events[0], MessageAction)
+            return events[1:], events[0]
+    except json.JSONDecodeError as e:
+        raise ValueError(f'Invalid JSON format in {trajectory_path}: {e}')
+
+
 if __name__ == '__main__':
    args = parse_arguments()

+    config = setup_config_from_args(args)
+
    # Determine the task
+    task_str = ''
    if args.file:
        task_str = read_task_from_file(args.file)
    elif args.task:
        task_str = args.task
    elif not sys.stdin.isatty():
        task_str = read_task_from_stdin()
+
+    initial_user_action: Action = NullAction()
+    if config.replay_trajectory_path:
+        if task_str:
+            raise ValueError(
+                'User-specified task is not supported under trajectory replay mode'
+            )
+    elif task_str:
+        initial_user_action = MessageAction(content=task_str)
    else:
        raise ValueError('No task provided. Please specify a task through -t, -f.')
-    initial_user_action: MessageAction = MessageAction(content=task_str)
-
-    config = setup_config_from_args(args)

    # Set session name
    session_name = args.name
--- a/openhands/core/setup.py
+++ b/openhands/core/setup.py
@@ -11,6 +11,7 @@ from openhands.core.config import (
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.events import EventStream
+from openhands.events.event import Event
 from openhands.llm.llm import LLM
 from openhands.runtime import get_runtime_cls
 from openhands.runtime.base import Runtime
@@ -78,7 +79,11 @@ def create_agent(runtime: Runtime, config: AppConfig) -> Agent:


 def create_controller(
-    agent: Agent, runtime: Runtime, config: AppConfig, headless_mode: bool = True
+    agent: Agent,
+    runtime: Runtime,
+    config: AppConfig,
+    headless_mode: bool = True,
+    replay_events: list[Event] | None = None,
 ) -> Tuple[AgentController, State | None]:
    event_stream = runtime.event_stream
    initial_state = None
@@ -101,6 +106,7 @@ def create_controller(
        initial_state=initial_state,
        headless_mode=headless_mode,
        confirmation_mode=config.security.confirmation_mode,
+        replay_events=replay_events,
    )
    return (controller, initial_state)

--- a/openhands/events/event.py
+++ b/openhands/events/event.py
@@ -24,6 +24,8 @@ class FileReadSource(str, Enum):

@dataclass
 class Event:
+    INVALID_ID = -1
+
    @property
    def message(self) -> str | None:
        if hasattr(self, '_message'):
@@ -34,7 +36,7 @@ class Event:
    def id(self) -> int:
        if hasattr(self, '_id'):
            return self._id  # type: ignore[attr-defined]
-        return -1
+        return Event.INVALID_ID

    @property
    def timestamp(self):
--- a/openhands/events/observation/browse.py
+++ b/openhands/events/observation/browse.py
@@ -12,7 +12,7 @@ class BrowserOutputObservation(Observation):

    url: str
    trigger_by_action: str
-    screenshot: str = field(repr=False)  # don't show in repr
+    screenshot: str = field(repr=False, default='')  # don't show in repr
    error: bool = False
    observation: str = ObservationType.BROWSE
    # do not include in the memory
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -230,7 +230,7 @@ class RemoteRuntime(ActionExecutionClient):
                f'Runtime started. URL: {self.runtime_url}',
            )
        except requests.HTTPError as e:
-            self.log('error', f'Unable to start runtime: {e}')
+            self.log('error', f'Unable to start runtime: {str(e)}')
            raise AgentRuntimeUnavailableError() from e

    def _resume_runtime(self):
@@ -315,10 +315,11 @@ class RemoteRuntime(ActionExecutionClient):
                self.check_if_alive()
            except requests.HTTPError as e:
                self.log(
-                    'warning', f"Runtime /alive failed, but pod says it's ready: {e}"
+                    'warning',
+                    f"Runtime /alive failed, but pod says it's ready: {str(e)}",
                )
                raise AgentRuntimeNotReadyError(
-                    f'Runtime /alive failed to respond with 200: {e}'
+                    f'Runtime /alive failed to respond with 200: {str(e)}'
                )
            return
        elif (
@@ -363,6 +364,7 @@ class RemoteRuntime(ActionExecutionClient):
                ):
                    self.log('debug', 'Runtime stopped.')
        except Exception as e:
+            self.log('error', f'Unable to stop runtime: {str(e)}')
            raise e
        finally:
            super().close()
--- a/openhands/runtime/utils/runtime_build.py
+++ b/openhands/runtime/utils/runtime_build.py
@@ -303,12 +303,17 @@ def truncate_hash(hash: str) -> str:

 def get_hash_for_lock_files(base_image: str):
    openhands_source_dir = Path(openhands.__file__).parent
+    project_root = openhands_source_dir.parent
    md5 = hashlib.md5()
    md5.update(base_image.encode())
    for file in ['pyproject.toml', 'poetry.lock']:
-        src = Path(openhands_source_dir, file)
+        # First try project root
+        src = Path(project_root, file)
        if not src.exists():
-            src = Path(openhands_source_dir.parent, file)
+            # Then try source dir
+            src = Path(openhands_source_dir, file)
+            if not src.exists():
+                raise FileNotFoundError(f"Could not find {file} in either {project_root} or {openhands_source_dir}")
        with open(src, 'rb') as f:
            for chunk in iter(lambda: f.read(4096), b''):
                md5.update(chunk)
--- a/openhands/server/routes/manage_conversations.py
+++ b/openhands/server/routes/manage_conversations.py
@@ -51,7 +51,10 @@ async def _create_new_conversation(
        session_init_args = {**settings.__dict__, **session_init_args}
        # We could use litellm.check_valid_key for a more accurate check,
        # but that would run a tiny inference.
-        if not settings.llm_api_key or settings.llm_api_key.isspace():
+        if (
+            not settings.llm_api_key
+            or settings.llm_api_key.get_secret_value().isspace()
+        ):
            logger.warn(f'Missing api key for model {settings.llm_model}')
            raise LLMAuthenticationError(
                'Error authenticating with the LLM provider. Please check your API key'
--- a/openhands/server/settings.py
+++ b/openhands/server/settings.py
@@ -1,6 +1,11 @@
+from __future__ import annotations
+
 from pydantic import BaseModel, SecretStr, SerializationInfo, field_serializer
 from pydantic.json import pydantic_encoder

+from openhands.core.config.llm_config import LLMConfig
+from openhands.core.config.utils import load_app_config
+

 class Settings(BaseModel):
    """
@@ -21,12 +26,31 @@ class Settings(BaseModel):
    def llm_api_key_serializer(self, llm_api_key: SecretStr, info: SerializationInfo):
        """Custom serializer for the LLM API key.

-        To serialize the API key instead of `"********"`, set `expose_secrets` to True in the serialization context. For example::
-
-        settings.model_dump_json(context={'expose_secrets': True})
+        To serialize the API key instead of ********, set expose_secrets to True in the serialization context.
        """
        context = info.context
        if context and context.get('expose_secrets', False):
            return llm_api_key.get_secret_value()

        return pydantic_encoder(llm_api_key)
+
+    @staticmethod
+    def from_config() -> Settings | None:
+        app_config = load_app_config()
+        llm_config: LLMConfig = app_config.get_llm_config()
+        if llm_config.api_key is None:
+            # If no api key has been set, we take this to mean that there is no reasonable default
+            return None
+        security = app_config.security
+        settings = Settings(
+            language='en',
+            agent=app_config.default_agent,
+            max_iterations=app_config.max_iterations,
+            security_analyzer=security.security_analyzer,
+            confirmation_mode=security.confirmation_mode,
+            llm_model=llm_config.model,
+            llm_api_key=llm_config.api_key,
+            llm_base_url=llm_config.base_url,
+            remote_runtime_resource_factor=app_config.sandbox.remote_runtime_resource_factor,
+        )
+        return settings
--- a/openhands/storage/settings/file_settings_store.py
+++ b/openhands/storage/settings/file_settings_store.py
@@ -23,7 +23,7 @@ class FileSettingsStore(SettingsStore):
            settings = Settings(**kwargs)
            return settings
        except FileNotFoundError:
-            return None
+            return Settings.from_config()

    async def store(self, settings: Settings):
        json_str = settings.model_dump_json(context={'expose_secrets': True})
--- a/tests/unit/test_file_settings_store.py
+++ b/tests/unit/test_file_settings_store.py
@@ -20,8 +20,12 @@ def file_settings_store(mock_file_store):

@pytest.mark.asyncio
 async def test_load_nonexistent_data(file_settings_store):
-    file_settings_store.file_store.read.side_effect = FileNotFoundError()
-    assert await file_settings_store.load() is None
+    with patch(
+        'openhands.server.settings.load_app_config',
+        MagicMock(return_value=AppConfig()),
+    ):
+        file_settings_store.file_store.read.side_effect = FileNotFoundError()
+        assert await file_settings_store.load() is None


@pytest.mark.asyncio
--- a/tests/unit/test_runtime_build.py
+++ b/tests/unit/test_runtime_build.py
@@ -99,15 +99,43 @@ def test_prep_build_folder(temp_dir):


 def test_get_hash_for_lock_files():
-    with patch('builtins.open', mock_open(read_data='mock-data'.encode())):
-        hash = get_hash_for_lock_files('some_base_image')
-        # Since we mocked open to always return "mock_data", the hash is the result
-        # of hashing the name of the base image followed by "mock-data" twice
-        md5 = hashlib.md5()
-        md5.update('some_base_image'.encode())
-        for _ in range(2):
-            md5.update('mock-data'.encode())
-        assert hash == truncate_hash(md5.hexdigest())
+    # Create a temporary directory structure that mimics the package structure
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Create project root with pyproject.toml and poetry.lock
+        project_root = Path(temp_dir)
+        openhands_dir = project_root / "openhands"
+        os.makedirs(openhands_dir)
+
+        # Create dummy files
+        with open(project_root / "pyproject.toml", "w") as f:
+            f.write("test_content")
+        with open(project_root / "poetry.lock", "w") as f:
+            f.write("test_content")
+
+        # Create a dummy __file__ attribute
+        dummy_file = openhands_dir / "__init__.py"
+        with open(dummy_file, "w") as f:
+            f.write("")
+
+        # Patch openhands.__file__ to point to our test directory
+        original_file = openhands.__file__
+        openhands.__file__ = str(dummy_file)
+
+        try:
+            # Test that it can find files in project root
+            hash1 = get_hash_for_lock_files("test_image")
+            assert isinstance(hash1, str)
+            assert len(hash1) > 0
+
+            # Test that it raises FileNotFoundError when files don't exist
+            os.remove(project_root / "pyproject.toml")
+            with pytest.raises(FileNotFoundError) as exc_info:
+                get_hash_for_lock_files("test_image")
+            assert "Could not find pyproject.toml" in str(exc_info.value)
+
+        finally:
+            # Restore original __file__
+            openhands.__file__ = original_file


 def test_get_hash_for_source_files():
--- a/tests/unit/test_settings.py
+++ b/tests/unit/test_settings.py
@@ -0,0 +1,67 @@
+from unittest.mock import patch
+
+from pydantic import SecretStr
+
+from openhands.core.config.app_config import AppConfig
+from openhands.core.config.llm_config import LLMConfig
+from openhands.core.config.sandbox_config import SandboxConfig
+from openhands.core.config.security_config import SecurityConfig
+from openhands.server.settings import Settings
+
+
+def test_settings_from_config():
+    # Mock configuration
+    mock_app_config = AppConfig(
+        default_agent='test-agent',
+        max_iterations=100,
+        security=SecurityConfig(
+            security_analyzer='test-analyzer', confirmation_mode=True
+        ),
+        llms={
+            'llm': LLMConfig(
+                model='test-model',
+                api_key=SecretStr('test-key'),
+                base_url='https://test.example.com',
+            )
+        },
+        sandbox=SandboxConfig(remote_runtime_resource_factor=2),
+    )
+
+    with patch(
+        'openhands.server.settings.load_app_config', return_value=mock_app_config
+    ):
+        settings = Settings.from_config()
+
+        assert settings is not None
+        assert settings.language == 'en'
+        assert settings.agent == 'test-agent'
+        assert settings.max_iterations == 100
+        assert settings.security_analyzer == 'test-analyzer'
+        assert settings.confirmation_mode is True
+        assert settings.llm_model == 'test-model'
+        assert settings.llm_api_key.get_secret_value() == 'test-key'
+        assert settings.llm_base_url == 'https://test.example.com'
+        assert settings.remote_runtime_resource_factor == 2
+
+
+def test_settings_from_config_no_api_key():
+    # Mock configuration without API key
+    mock_app_config = AppConfig(
+        default_agent='test-agent',
+        max_iterations=100,
+        security=SecurityConfig(
+            security_analyzer='test-analyzer', confirmation_mode=True
+        ),
+        llms={
+            'llm': LLMConfig(
+                model='test-model', api_key=None, base_url='https://test.example.com'
+            )
+        },
+        sandbox=SandboxConfig(remote_runtime_resource_factor=2),
+    )
+
+    with patch(
+        'openhands.server.settings.load_app_config', return_value=mock_app_config
+    ):
+        settings = Settings.from_config()
+        assert settings is None
Author	SHA1	Message	Date
openhands	d56befb58a	Fix issue #6223 : [Bug]: fix-me-experimental fails with "no such file or directory pyproject.toml"	2025-01-19 00:33:49 +00:00
Xingyao Wang	2b04ee2e62	feat(eval): reliability improvement for SWE-Bench eval_infer (#6347 )	2025-01-18 14:02:59 -05:00
Boxuan Li	4383be1ab4	(feat) Add trajectory replay for headless mode (#6215 )	2025-01-18 05:48:22 +00:00
tofarr	b4d20e3e18	Feat: settings default (#6328 ) Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: openhands <openhands@all-hands.dev>	2025-01-17 20:17:18 -07:00
mamoodi	532c7cdf02	Attempt to fix doc deploy (#6337 )	2025-01-18 00:16:47 +00:00
mamoodi	987861b5e7	Remove broken browser counter logic (#6334 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-01-17 22:41:31 +00:00
Calvin Smith	f07ec7a09c	fix: Conversation creation accessing secret without unwrapping (#6335 ) Co-authored-by: Calvin Smith <calvin@all-hands.dev>	2025-01-17 22:16:57 +00:00
Xingyao Wang	b1fa6301f0	feat: add prompt for generating repo.md for an arbiratry repo (#6034 ) Co-authored-by: Graham Neubig <neubig@gmail.com>	2025-01-17 21:47:27 +00:00