Fix lint errors

Fix merge conflicts with main branch
chore(deps): bump the mcp-packages group with 2 updates (#8546 )
2026-04-29 03:00:45 -04:00 · 2025-05-19 16:55:19 +00:00 · 2025-05-19 16:53:24 +00:00 · 2025-05-19 18:37:11 +02:00 · 2025-05-19 09:59:22 -06:00 · 2025-05-19 15:49:53 +00:00
150 changed files with 7495 additions and 5119 deletions
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -13,6 +13,10 @@ updates:
      browsergym:
        patterns:
          - "browsergym*"
+      mcp-packages:
+        patterns:
+          - "mcp"
+          - "mcpm"
      security-all:
        applies-to: "security-updates"
        patterns:
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -30,11 +30,12 @@ jobs:
        run: |
          cd frontend
          npm install --frozen-lockfile
-      - name: Lint and TypeScript compilation
+      - name: Lint, TypeScript compilation, and translation checks
        run: |
          cd frontend
          npm run lint
          npm run make-i18n && tsc
+          npm run check-translation-completeness

  # Run lint on the python code
  lint-python:
--- a/.openhands/microagents/documentation.md
+++ b/.openhands/microagents/documentation.md
@@ -0,0 +1,33 @@
+---
+name: documentation
+type: knowledge
+version: 1.0.0
+agent: CodeActAgent
+triggers:
+- documentation
+- docs
+- document
+---
+
+# Documentation Guidelines
+
+All documentation must be grounded in fact, so you must not make anything up without proper evidence. When you have finished writing documentation, convey to the user what reference source, including web pages, source code, or other sources of documentation you referenced when writing each new fact in the documentation. If you cannot reference a source for anything do not include it in the pull request.
+
+## Best Practices for Documentation
+
+1. **Be Factual**: Only include information that can be verified from reliable sources.
+2. **Cite Sources**: Always reference the source of information (code, web pages, official documentation).
+3. **Be Clear and Concise**: Use simple language and avoid unnecessary jargon.
+4. **Use Examples**: Include practical examples to illustrate concepts.
+5. **Structure Properly**: Use headings, lists, and code blocks to organize information.
+6. **Keep Updated**: Ensure documentation reflects the current state of the code or system.
+
+## Documentation Process
+
+1. Research and gather information from reliable sources
+2. Draft documentation based on verified facts
+3. Review for accuracy and completeness
+4. Include references for all factual statements
+5. Submit only when all information is properly sourced
+
+Remember: If you cannot verify a piece of information, it's better to exclude it than to include potentially incorrect information.
--- a/30
+++ b/30
@@ -5,6 +5,7 @@ SHELL=/usr/bin/env bash
 BACKEND_HOST ?= "127.0.0.1"
 BACKEND_PORT = 3000
 BACKEND_HOST_PORT = "$(BACKEND_HOST):$(BACKEND_PORT)"
+FRONTEND_HOST ?= "127.0.0.1"
 FRONTEND_PORT = 3001
 DEFAULT_WORKSPACE_DIR = "./workspace"
 DEFAULT_MODEL = "gpt-4o"
@@ -154,20 +155,21 @@ install-python-dependencies:
 	fi
 	@if [ "${INSTALL_PLAYWRIGHT}" != "false" ] && [ "${INSTALL_PLAYWRIGHT}" != "0" ]; then \
 		if [ -f "/etc/manjaro-release" ]; then \
-			echo "$(BLUE)Detected Manjaro Linux. Installing Patchright dependencies...$(RESET)"; \
-			poetry run patchright install chromium; \
+			echo "$(BLUE)Detected Manjaro Linux. Installing Playwright dependencies...$(RESET)"; \
+			poetry run pip install playwright; \
+			poetry run playwright install chromium; \
 		else \
-			if [ ! -f cache/patchright_chromium_is_installed.txt ]; then \
-				echo "Installing patchright chromium..."; \
-				poetry run patchright install chromium; \
+			if [ ! -f cache/playwright_chromium_is_installed.txt ]; then \
+				echo "Running playwright install --with-deps chromium..."; \
+				poetry run playwright install --with-deps chromium; \
 				mkdir -p cache; \
-				touch cache/patchright_chromium_is_installed.txt; \
+				touch cache/playwright_chromium_is_installed.txt; \
 			else \
-				echo "Setup already done. Skipping patchright installation."; \
+				echo "Setup already done. Skipping playwright installation."; \
 			fi \
 		fi \
 	else \
-		echo "Skipping Patchright installation (INSTALL_PLAYWRIGHT=${INSTALL_PLAYWRIGHT})."; \
+		echo "Skipping Playwright installation (INSTALL_PLAYWRIGHT=${INSTALL_PLAYWRIGHT})."; \
 	fi
 	@echo "$(GREEN)Python dependencies installed successfully.$(RESET)"

@@ -287,6 +289,15 @@ setup-config-prompts:
 	@read -p "Enter your LLM base URL [mostly used for local LLMs, leave blank if not needed - example: http://localhost:5001/v1/]: " llm_base_url; \
 	 if [[ ! -z "$$llm_base_url" ]]; then echo "base_url=\"$$llm_base_url\"" >> $(CONFIG_FILE).tmp; fi

+setup-config-basic:
+	@printf '%s\n' \
+	'[core]' \
+	'workspace_base="./workspace"' \
+	> config.toml
+	@echo "$(GREEN)config.toml created.$(RESET)"
+
+openhands-cloud-run:
+	@$(MAKE) run BACKEND_HOST="0.0.0.0" BACKEND_PORT="12000" FRONTEND_HOST="0.0.0.0" FRONTEND_PORT="12001"

 # Develop in container
 docker-dev:
@@ -321,5 +332,4 @@ help:
 	@echo "  $(GREEN)help$(RESET)                - Display this help message, providing information on available targets."

 # Phony targets
-.PHONY: build check-dependencies check-python check-npm check-docker check-poetry install-python-dependencies install-frontend-dependencies install-pre-commit-hooks lint start-backend start-frontend run run-wsl setup-config setup-config-prompts help
-.PHONY: docker-dev docker-run
+.PHONY: build check-dependencies check-system check-python check-npm check-nodejs check-docker check-poetry install-python-dependencies install-frontend-dependencies install-pre-commit-hooks lint-backend lint-frontend lint test-frontend test build-frontend start-backend start-frontend _run_setup run run-wsl setup-config setup-config-prompts setup-config-basic openhands-cloud-run docker-dev docker-run clean help
--- a/docs/modules/usage/how-to/websocket-connection.md
+++ b/docs/modules/usage/how-to/websocket-connection.md
@@ -0,0 +1,181 @@
+---
+sidebar_position: 9
+---
+
+# Connecting to the WebSocket
+
+This guide explains how to connect to the OpenHands WebSocket API to receive real-time events and send actions to the agent.
+
+## Overview
+
+OpenHands uses [Socket.IO](https://socket.io/) for WebSocket communication between the client and server. The WebSocket connection allows you to:
+
+1. Receive real-time events from the agent
+2. Send user actions to the agent
+3. Maintain a persistent connection for ongoing conversations
+
+## Connecting to the WebSocket
+
+### Connection Parameters
+
+When connecting to the WebSocket, you need to provide the following query parameters:
+
+- `conversation_id`: The ID of the conversation you want to join
+- `latest_event_id`: The ID of the latest event you've received (use `-1` for a new connection)
+- `providers_set`: (Optional) A comma-separated list of provider types
+
+### Connection Example
+
+Here's a basic example of connecting to the WebSocket using JavaScript:
+
+```javascript
+import { io } from "socket.io-client";
+
+const socket = io("http://localhost:3000", {
+  transports: ["websocket"],
+  query: {
+    conversation_id: "your-conversation-id",
+    latest_event_id: -1,
+    providers_set: "github,gitlab" // Optional
+  }
+});
+
+socket.on("connect", () => {
+  console.log("Connected to OpenHands WebSocket");
+});
+
+socket.on("oh_event", (event) => {
+  console.log("Received event:", event);
+});
+
+socket.on("connect_error", (error) => {
+  console.error("Connection error:", error);
+});
+
+socket.on("disconnect", (reason) => {
+  console.log("Disconnected:", reason);
+});
+```
+
+## Sending Actions to the Agent
+
+To send an action to the agent, use the `oh_user_action` event:
+
+```javascript
+// Send a user message to the agent
+socket.emit("oh_user_action", {
+  type: "message",
+  source: "user",
+  message: "Hello, can you help me with my project?"
+});
+```
+
+## Receiving Events from the Agent
+
+The server emits events using the `oh_event` event type. Here are some common event types you might receive:
+
+- User messages (`source: "user", type: "message"`)
+- Agent messages (`source: "agent", type: "message"`)
+- File edits (`action: "edit"`)
+- File writes (`action: "write"`)
+- Command executions (`action: "run"`)
+
+Example event handler:
+
+```javascript
+socket.on("oh_event", (event) => {
+  if (event.source === "agent" && event.type === "message") {
+    console.log("Agent says:", event.message);
+  } else if (event.action === "run") {
+    console.log("Command executed:", event.args.command);
+    console.log("Result:", event.result);
+  }
+});
+```
+
+## Using Websocat for Testing
+
+[Websocat](https://github.com/vi/websocat) is a command-line tool for interacting with WebSockets. It's useful for testing your WebSocket connection without writing a full client application.
+
+### Installation
+
+```bash
+# On macOS
+brew install websocat
+
+# On Linux
+curl -L https://github.com/vi/websocat/releases/download/v1.11.0/websocat.x86_64-unknown-linux-musl > websocat
+chmod +x websocat
+sudo mv websocat /usr/local/bin/
+```
+
+### Connecting to the WebSocket
+
+```bash
+# Connect to the WebSocket and print all received messages
+echo "40{}" | \
+websocat "ws://localhost:3000/socket.io/?EIO=4&transport=websocket&conversation_id=your-conversation-id&latest_event_id=-1"
+```
+
+### Sending a Message
+
+```bash
+# Send a message to the agent
+echo '42["oh_user_action",{"type":"message","source":"user","message":"Hello, agent!"}]' | \
+websocat "ws://localhost:3000/socket.io/?EIO=4&transport=websocket&conversation_id=your-conversation-id&latest_event_id=-1"
+```
+
+### Complete Example with Websocat
+
+Here's a complete example of connecting to the WebSocket, sending a message, and receiving events:
+
+```bash
+# Start a persistent connection
+websocat -v "ws://localhost:3000/socket.io/?EIO=4&transport=websocket&conversation_id=your-conversation-id&latest_event_id=-1"
+
+# In another terminal, send a message
+echo '42["oh_user_action",{"type":"message","source":"user","message":"Can you help me with my project?"}]' | \
+websocat "ws://localhost:3000/socket.io/?EIO=4&transport=websocket&conversation_id=your-conversation-id&latest_event_id=-1"
+```
+
+## Event Structure
+
+Events sent and received through the WebSocket follow a specific structure:
+
+```typescript
+interface OpenHandsEvent {
+  id: string;           // Unique event ID
+  source: string;       // "user" or "agent"
+  timestamp: string;    // ISO timestamp
+  message?: string;     // For message events
+  type?: string;        // Event type (e.g., "message")
+  action?: string;      // Action type (e.g., "run", "edit", "write")
+  args?: any;           // Action arguments
+  result?: any;         // Action result
+}
+```
+
+## Best Practices
+
+1. **Handle Reconnection**: Implement reconnection logic in your client to handle network interruptions.
+2. **Track Event IDs**: Store the latest event ID you've received and use it when reconnecting to avoid duplicate events.
+3. **Error Handling**: Implement proper error handling for connection errors and failed actions.
+4. **Rate Limiting**: Avoid sending too many actions in a short period to prevent overloading the server.
+
+## Troubleshooting
+
+### Connection Issues
+
+- Verify that the OpenHands server is running and accessible
+- Check that you're providing the correct conversation ID
+- Ensure your WebSocket URL is correctly formatted
+
+### Authentication Issues
+
+- Make sure you have the necessary authentication cookies if required
+- Verify that you have permission to access the specified conversation
+
+### Event Handling Issues
+
+- Check that you're correctly parsing the event data
+- Verify that your event handlers are properly registered
--- a/docs/modules/usage/prompting/microagents-org.md
+++ b/docs/modules/usage/prompting/microagents-org.md
@@ -0,0 +1,23 @@
+# Organization and User Microagents
+
+## Purpose
+
+Organizations and users can define microagents that apply to all repositories belonging to the organization or user.
+
+## Usage
+
+These microagents can be [any type of microagent](./microagents-overview#microagent-types) and will be loaded 
+accordingly. However, they are applied to all repositories belonging to the organization or user.
+
+Add a `.openhands` repository under the organization or user and create a `microagents` directory and place the
+microagents in that directory.
+
+## Example
+
+General microagent file example for organization `Great-Co` located inside the `.openhands` repository:
+`microagents/org-microagent.md`:
+```
+* Use type hints and error boundaries; validate inputs at system boundaries and fail with meaningful error messages.
+* Document interfaces and public APIs; use implementation comments only for non-obvious logic.
+* Follow the same naming convention for variables, classes, constants, etc. already used in each repository.
+```
--- a/docs/modules/usage/prompting/microagents-overview.md
+++ b/docs/modules/usage/prompting/microagents-overview.md
@@ -7,7 +7,7 @@ They provide expert guidance, automate common tasks, and ensure consistent pract

 Currently OpenHands supports the following types of microagents:

- [General Repository Microagents](./microagents-repo): General guidelines for OpenHands about the repository.
+- [General Microagents](./microagents-repo): General guidelines for OpenHands about the repository.
 - [Keyword-Triggered Microagents](./microagents-keyword): Guidelines activated by specific keywords in prompts.

 To customize OpenHands' behavior, create a .openhands/microagents/ directory in the root of your repository and
@@ -24,7 +24,7 @@ Example repository structure:
 some-repository/
 └── .openhands/
    └── microagents/
-        └── repo.md            # General repository guidelines
+        └── repo.md            # General guidelines
        └── trigger_this.md    # Microagent triggered by specific keywords
        └── trigger_that.md    # Microagent triggered by specific keywords
 ```
@@ -34,7 +34,7 @@ some-repository/
 Each microagent file may include frontmatter that provides additional information. In some cases, this frontmatter
 is required:

-| Microagent Type                  | Required |
-|----------------------------------|----------|
-| `General Repository Microagents` | No       |
-| `Keyword-Triggered Microagents`  | Yes      |
+| Microagent Type                 | Required |
+|---------------------------------|----------|
+| `General Microagents`           | No       |
+| `Keyword-Triggered Microagents` | Yes      |
--- a/docs/modules/usage/prompting/microagents-repo.md
+++ b/docs/modules/usage/prompting/microagents-repo.md
@@ -1,4 +1,4 @@
-# General Repository Microagents
+# General Microagents

 ## Purpose

@@ -20,7 +20,7 @@ Frontmatter should be enclosed in triple dashes (---) and may include the follow

 ## Example

-General repository microagent file example located at `.openhands/microagents/repo.md`:
+General microagent file example located at `.openhands/microagents/repo.md`:
 ```
 This project is a TODO application that allows users to track TODO items.

@@ -28,4 +28,4 @@ To set it up, you can run `npm run build`.
 Always make sure the tests are passing before committing changes. You can run the tests by running `npm run test`.
 ```

-[See more examples of general repository microagents here.](https://github.com/All-Hands-AI/OpenHands/tree/main/.openhands/microagents)
+[See more examples of general microagents here.](https://github.com/All-Hands-AI/OpenHands/tree/main/.openhands/microagents)
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@@ -70,7 +70,7 @@ const sidebars: SidebarsConfig = {
            },
            {
              type: 'doc',
-              label: 'General Repository Microagents',
+              label: 'General Microagents',
              id: 'usage/prompting/microagents-repo',
            },
            {
@@ -78,6 +78,11 @@ const sidebars: SidebarsConfig = {
              label: 'Keyword-Triggered Microagents',
              id: 'usage/prompting/microagents-keyword',
            },
+            {
+              type: 'doc',
+              label: 'Organization and User Microagents',
+              id: 'usage/prompting/microagents-org',
+            },
            {
              type: 'doc',
              label: 'Global Microagents',
@@ -267,6 +272,11 @@ const sidebars: SidebarsConfig = {
          label: 'Evaluation',
          id: 'usage/how-to/evaluation-harness',
        },
+        {
+          type: 'doc',
+          label: 'WebSocket Connection',
+          id: 'usage/how-to/websocket-connection',
+        },
      ],
    },
    {
--- a/evaluation/benchmarks/swe_bench/run_infer.py
+++ b/evaluation/benchmarks/swe_bench/run_infer.py
@@ -261,6 +261,7 @@ def get_config(
        enable_jupyter=False,
        enable_browsing=RUN_WITH_BROWSING,
        enable_llm_editor=False,
+        enable_mcp=False,
        condenser=metadata.condenser_config,
        enable_prompt_extensions=False,
    )
--- a/evaluation/benchmarks/visual_swe_bench/README.md
+++ b/evaluation/benchmarks/visual_swe_bench/README.md
@@ -0,0 +1,172 @@
+# Visual SWE-Bench Evaluation with Docker Image
+
+This folder contains the evaluation harness that we built on top of the original [Visual SWE-Bench benchmark](https://multi-swe-bench.github.io/#/) ([paper](https://arxiv.org/abs/2412.17315)).
+
+The evaluation consists of three steps:
+
+1. Environment setup: [install python environment](../../README.md#development-environment), [configure LLM config](../../README.md#configure-openhands-and-your-llm), and [pull docker](#openhands-visual-swe-bench-instance-level-docker-support).
+2. [Run inference](#run-inference-on-visual-swe-bench-instances): Generate a edit patch for each Github issue.
+3. [Evaluate patches using Visual SWE-Bench docker](#evaluate-generated-patches).
+
+## Setup Environment and LLM Configuration
+
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
+
+## OpenHands Visual SWE-Bench Instance-level Docker Support
+
+OpenHands now support using the official evaluation docker for both **[inference](#run-inference-on-visual-swe-bench-instances) and [evaluation](#evaluate-generated-patches)**.
+This is now the default behavior.
+
+## Run Inference on Visual SWE-Bench Instances
+
+Make sure your Docker daemon is running, and you have ample disk space for the [instance-level docker image](#openhands-visual-swe-bench-instance-level-docker-support).
+
+When the `run_infer.sh` script is started, it will automatically pull the relevant Visual SWE-Bench images. For example, for instance ID `networkx__networkx-6503`, it will try to pull our pre-build docker image `sweb.eval.x86_64.networkx_s_networkx-6503` from DockerHub. This image will be used create an OpenHands runtime image where the agent will operate on.
+
+```bash
+./evaluation/benchmarks/visual_swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers]
+
+# Example
+./evaluation/benchmarks/visual_swe_bench/scripts/run_infer.sh llm.eval_gpt4_1106_preview HEAD CodeActAgent 133 30 1
+```
+
+where `model_config` is mandatory, and the rest are optional.
+
+- `model_config`, e.g. `eval_gpt4_1106_preview`, is the config group name for your
+LLM settings, as defined in your `config.toml`.
+- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenHands version you would
+like to evaluate. It could also be a release tag like `0.6.2`.
+- `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks, defaulting
+to `CodeActAgent`.
+- `eval_limit`, e.g. `10`, limits the evaluation to the first `eval_limit` instances. By
+default, the script evaluates the entire Visual SWE-bench set (133 issues). Note:
+in order to use `eval_limit`, you must also set `agent`.
+- `max_iter`, e.g. `20`, is the maximum number of iterations for the agent to run. By
+default, it is set to 30.
+- `num_workers`, e.g. `3`, is the number of parallel workers to run the evaluation. By
+default, it is set to 1.
+
+There are also two optional environment variables you can set.
+
+```bash
+export USE_HINT_TEXT=true # if you want to use hint text in the evaluation. Default to false. Ignore this if you are not sure.
+export USE_INSTANCE_IMAGE=true # if you want to use instance-level docker images. Default to true
+```
+
+Let's say you'd like to run 10 instances using `llm.eval_gpt4_1106_preview` and CodeActAgent,
+
+then your command would be:
+
+```bash
+./evaluation/benchmarks/visual_swe_bench/scripts/run_infer.sh llm.eval_gpt4_1106_preview HEAD CodeActAgent 10
+```
+
+### Specify a subset of tasks to run infer
+
+If you would like to specify a list of tasks you'd like to benchmark on, you could
+create a `config.toml` under `./evaluation/benchmarks/visual_swe_bench/` folder, and put a list
+attribute named `selected_ids`, e.g.
+
+```toml
+selected_ids = ['astropy__astropy-13838', 'matplotlib__matplotlib-21617', 'plotly__plotly.py-1966']
+```
+
+Then only these tasks (rows whose `instance_id` is in the above list) will be evaluated.
+In this case, `eval_limit` option applies to tasks that are in the `selected_ids` list.
+
+After running the inference, you will obtain a `output.jsonl` (by default it will be saved to `evaluation/evaluation_outputs`).
+
+## Evaluate Generated Patches
+
+### Download Docker Images
+
+**(Recommended for reproducibility)** If you have extra local space (e.g., 200GB), you can try pull the instance-level docker images we've prepared by running:
+
+```bash
+evaluation/benchmarks/visual_swe_bench/scripts/docker/pull_all_eval_docker.sh instance
+```
+
+If you want to save disk space a bit, while speeding up the image pre-build process, you can pull the environment-level docker images:
+
+```bash
+evaluation/benchmarks/visual_swe_bench/scripts/docker/pull_all_eval_docker.sh env
+```
+
+If you want to evaluate on the full SWE-Bench test set:
+
+```bash
+evaluation/benchmarks/visual_swe_bench/scripts/docker/pull_all_eval_docker.sh instance full
+```
+
+### Run evaluation
+
+With `output.jsonl` file, you can run `eval_infer.sh` to evaluate generated patches, and produce a fine-grained report.
+
+**This evaluation is performed using the official dockerized evaluation announced.**
+
+> If you want to evaluate existing results, you should first run this to clone existing outputs
+>
+>```bash
+>git clone https://huggingface.co/spaces/OpenHands/evaluation evaluation/evaluation_outputs
+>```
+
+NOTE, you should have already pulled the instance-level OR env-level docker images following [this section](#openhands-visual-swe-bench-instance-level-docker-support).
+
+Then you can run the following:
+
+```bash
+./evaluation/benchmarks/visual_swe_bench/scripts/eval_infer.sh $YOUR_OUTPUT_JSONL [instance_id]
+
+# Example
+./evaluation/benchmarks/visual_swe_bench/scripts/eval_infer.sh evaluation/evaluation_outputs/outputs/luolin101__Visual-SWE-bench-test/CodeActAgent/gpt-4-1106-preview_maxiter_50_N_v1.0/output.jsonl
+```
+
+The script now accepts optional arguments:
+
+- `instance_id`: Specify a single instance to evaluate (optional)
+
+For example, to evaluate a specific instance with a custom dataset and split:
+
+```bash
+./evaluation/benchmarks/visual_swe_bench/scripts/eval_infer.sh $YOUR_OUTPUT_JSONL instance_123
+```
+
+> You can also pass in a JSONL with SWE-Bench format to `./evaluation/benchmarks/visual_swe_bench/scripts/eval_infer.sh`, where each line is a JSON of `{"model_patch": "XXX", "model_name_or_path": "YYY", "instance_id": "ZZZ"}`.
+
+The final results will be saved to `evaluation/evaluation_outputs/outputs/visual_swe_bench/CodeActAgent/gpt-4-1106-preview_maxiter_50_N_v1.0/` with the following files/directory:
+
+- `README.md`: a report showing what are the instances that passed, failed, etc.
+- `report.json`: a JSON file that contains keys like `"resolved_ids"` pointing to instance IDs that are resolved by the agent.
+- `logs/`: a directory of test logs
+
+## Visualize Results
+
+First you need to clone `https://huggingface.co/spaces/OpenHands/evaluation` and add your own running results from openhands into the `outputs` of the cloned repo.
+
+```bash
+git clone https://huggingface.co/spaces/OpenHands/evaluation
+```
+
+**(optional) setup streamlit environment with conda**:
+
+```bash
+cd evaluation
+conda create -n streamlit python=3.10
+conda activate streamlit
+pip install -r requirements.txt
+```
+
+**run the visualizer**:
+Then, in a separate Python environment with `streamlit` library, you can run the following:
+
+```bash
+# Make sure you are inside the cloned `evaluation` repo
+conda activate streamlit # if you follow the optional conda env setup above
+streamlit app.py --server.port 8501 --server.address 0.0.0.0
+```
+
+Then you can access the SWE-Bench trajectory visualizer at `localhost:8501`.
+
+## Submit your evaluation results
+
+You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
--- a/evaluation/benchmarks/visual_swe_bench/init.py
+++ b/evaluation/benchmarks/visual_swe_bench/init.py
--- a/evaluation/benchmarks/visual_swe_bench/run_infer.py
+++ b/evaluation/benchmarks/visual_swe_bench/run_infer.py
@@ -0,0 +1,641 @@
+import asyncio
+import json
+import os
+import tempfile
+from typing import Any
+
+import pandas as pd
+import toml
+from datasets import load_dataset
+
+import openhands.agenthub
+from evaluation.benchmarks.swe_bench.resource.mapping import (
+    get_instance_resource_factor,
+)
+from evaluation.utils.shared import (
+    EvalException,
+    EvalMetadata,
+    EvalOutput,
+    assert_and_raise,
+    codeact_user_response,
+    get_default_sandbox_config_for_eval,
+    get_metrics,
+    is_fatal_evaluation_error,
+    make_metadata,
+    prepare_dataset,
+    reset_logger_for_multiprocessing,
+    run_evaluation,
+    update_llm_config_for_completions_logging,
+)
+from openhands.controller.state.state import State
+from openhands.core.config import (
+    AgentConfig,
+    AppConfig,
+    get_llm_config_arg,
+    get_parser,
+)
+from openhands.core.logger import openhands_logger as logger
+from openhands.core.main import create_runtime, run_controller
+from openhands.events.action import CmdRunAction, MessageAction
+from openhands.events.observation import CmdOutputObservation, ErrorObservation
+from openhands.events.serialization.event import event_to_dict
+from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
+from openhands.utils.shutdown_listener import sleep_if_should_continue
+
+USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
+RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true'
+
+
+AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
+    'CodeActAgent': codeact_user_response,
+}
+
+
+def _get_swebench_workspace_dir_name(instance: pd.Series) -> str:
+    return f'{instance.repo}__{instance.version}'.replace('/', '__')
+
+
+def get_instruction(instance: pd.Series, metadata: EvalMetadata):
+    workspace_dir_name = _get_swebench_workspace_dir_name(instance)
+    # Instruction based on Anthropic's official trajectory
+    # https://github.com/eschluntz/swe-bench-experiments/tree/main/evaluation/verified/20241022_tools_claude-3-5-sonnet-updated/trajs
+    instruction = (
+        '<uploaded_files>\n'
+        f'/workspace/{workspace_dir_name}\n'
+        '</uploaded_files>\n'
+        f"I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
+        f'<issue_description>\n'
+        f'{instance.problem_statement}\n'
+        '</issue_description>\n\n'
+        'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
+        "I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
+        "Also the development Python environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
+        'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
+        'Follow these steps to resolve the issue:\n'
+        '1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
+        '2. Create a script to reproduce the error and execute it with `python <filename.py>` using the BashTool, to confirm the error\n'
+        '3. Edit the sourcecode of the repo to resolve the issue\n'
+        '4. Rerun your reproduce script and confirm that the error is fixed!\n'
+        '5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well\n'
+        f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
+        '   - The issue you are fixing\n'
+        '   - The files you modified\n'
+        '   - The functions you changed\n'
+        '   Make sure all these tests pass with your changes.\n'
+        "Your thinking should be thorough and so it's fine if it's very long.\n"
+    )
+
+    if RUN_WITH_BROWSING:
+        instruction += (
+            '<IMPORTANT!>\nYou SHOULD NEVER attempt to browse the web. </IMPORTANT!>\n'
+        )
+    return instruction
+
+
+# TODO: migrate all swe-bench docker to ghcr.io/openhands
+DOCKER_IMAGE_PREFIX = os.environ.get('EVAL_DOCKER_IMAGE_PREFIX', 'docker.io/xingyaoww/')
+logger.info(f'Using docker image prefix: {DOCKER_IMAGE_PREFIX}')
+
+
+def get_instance_docker_image(instance_id: str, official_image: bool = False) -> str:
+    image_name = 'sweb.eval.x86_64.' + instance_id
+    image_name = image_name.replace(
+        '__', '_s_'
+    )  # to comply with docker image naming convention
+    other_list = [
+        'plotly__plotly.py-4083',
+        'plotly__plotly.py-2600',
+        'plotly__plotly.py-2591',
+        'plotly__plotly.py-1966',
+        'networkx__networkx-6503',
+        'networkx__networkx-6098',
+        'networkx__networkx-5616',
+        'networkx__networkx-5354',
+        'networkx__networkx-5058',
+        'networkx__networkx-4378',
+        'networkx__networkx-3764',
+        'vega__altair-2785',
+        'vega__altair-1092',
+        'vega__altair-974',
+        'vega__altair-830',
+        'matplotlib__matplotlib-27754',
+        'matplotlib__matplotlib-26926',
+        'matplotlib__matplotlib-26788',
+        'matplotlib__matplotlib-26586',
+        'sympy__sympy-26941',
+        'mwaskom__seaborn-3458',
+        'mwaskom__seaborn-3454',
+    ]
+    if instance_id in other_list:
+        return ('docker.io/luolin101/'.rstrip('/') + '/' + image_name).lower()
+    return (DOCKER_IMAGE_PREFIX.rstrip('/') + '/' + image_name).lower()
+
+
+def get_config(
+    instance: pd.Series,
+    metadata: EvalMetadata,
+) -> AppConfig:
+    # We use a different instance image for the each instance of swe-bench eval
+    use_official_image = bool(
+        'verified' in metadata.dataset.lower() or 'lite' in metadata.dataset.lower()
+    )
+    base_container_image = get_instance_docker_image(
+        instance['instance_id'], use_official_image
+    )
+    logger.info(
+        f'Using instance container image: {base_container_image}. '
+        f'Please make sure this image exists. '
+        f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
+    )
+
+    sandbox_config = get_default_sandbox_config_for_eval()
+    sandbox_config.base_container_image = base_container_image
+    sandbox_config.enable_auto_lint = True
+    sandbox_config.use_host_network = False
+    # Add platform to the sandbox config to solve issue 4401
+    sandbox_config.platform = 'linux/amd64'
+    sandbox_config.remote_runtime_resource_factor = get_instance_resource_factor(
+        dataset_name=metadata.dataset,
+        instance_id=instance['instance_id'],
+    )
+
+    config = AppConfig(
+        default_agent=metadata.agent_class,
+        run_as_openhands=False,
+        max_iterations=metadata.max_iterations,
+        runtime=os.environ.get('RUNTIME', 'docker'),
+        sandbox=sandbox_config,
+        # do not mount workspace
+        workspace_base=None,
+        workspace_mount_path=None,
+    )
+    config.set_llm_config(
+        update_llm_config_for_completions_logging(
+            metadata.llm_config, metadata.eval_output_dir, instance['instance_id']
+        )
+    )
+    agent_config = AgentConfig(
+        enable_jupyter=False,
+        enable_browsing=RUN_WITH_BROWSING,
+        enable_llm_editor=False,
+        condenser=metadata.condenser_config,
+        enable_prompt_extensions=False,
+    )
+    config.set_agent_config(agent_config)
+    return config
+
+
+def initialize_runtime(
+    runtime: Runtime,
+    instance: pd.Series,  # this argument is not required
+):
+    """Initialize the runtime for the agent.
+
+    This function is called before the runtime is used to run the agent.
+    """
+    logger.info('-' * 30)
+    logger.info('BEGIN Runtime Initialization Fn')
+    logger.info('-' * 30)
+    workspace_dir_name = _get_swebench_workspace_dir_name(instance)
+    obs: CmdOutputObservation
+
+    # Set instance id
+    action = CmdRunAction(
+        command=f"""echo 'export SWE_INSTANCE_ID={instance['instance_id']}' >> ~/.bashrc && echo 'export PIP_CACHE_DIR=~/.cache/pip' >> ~/.bashrc && echo "alias git='git --no-pager'" >> ~/.bashrc"""
+    )
+    action.set_hard_timeout(600)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert_and_raise(
+        obs.exit_code == 0, f'Failed to export SWE_INSTANCE_ID: {str(obs)}'
+    )
+
+    action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
+    action.set_hard_timeout(600)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {str(obs)}')
+
+    # inject the init script
+    script_dir = os.path.dirname(__file__)
+
+    # inject the instance info
+    action = CmdRunAction(command='mkdir -p /swe_util/eval_data/instances')
+    action.set_hard_timeout(600)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert_and_raise(
+        obs.exit_code == 0,
+        f'Failed to create /swe_util/eval_data/instances: {str(obs)}',
+    )
+
+    swe_instance_json_name = 'swe-bench-instance.json'
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Construct the full path for the desired file name within the temporary directory
+        temp_file_path = os.path.join(temp_dir, swe_instance_json_name)
+        # Write to the file with the desired name within the temporary directory
+        with open(temp_file_path, 'w') as f:
+            if not isinstance(instance, dict):
+                json.dump([instance.to_dict()], f)
+            else:
+                json.dump([instance], f)
+
+        # Copy the file to the desired location
+        runtime.copy_to(temp_file_path, '/swe_util/eval_data/instances/')
+
+        # inject the instance swe entry
+        runtime.copy_to(
+            str(os.path.join(script_dir, 'scripts/setup/instance_swe_entry.sh')),
+            '/swe_util/',
+        )
+
+    action = CmdRunAction(command='cat ~/.bashrc')
+    action.set_hard_timeout(600)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert_and_raise(obs.exit_code == 0, f'Failed to cat ~/.bashrc: {str(obs)}')
+
+    action = CmdRunAction(command='source ~/.bashrc')
+    action.set_hard_timeout(600)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    if isinstance(obs, ErrorObservation):
+        logger.error(f'Failed to source ~/.bashrc: {str(obs)}')
+    assert_and_raise(obs.exit_code == 0, f'Failed to source ~/.bashrc: {str(obs)}')
+
+    action = CmdRunAction(command='source /swe_util/instance_swe_entry.sh')
+    action.set_hard_timeout(600)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert_and_raise(
+        obs.exit_code == 0,
+        f'Failed to source /swe_util/instance_swe_entry.sh: {str(obs)}',
+    )
+
+    action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
+    action.set_hard_timeout(600)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert_and_raise(
+        obs.exit_code == 0,
+        f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
+    )
+
+    action = CmdRunAction(command='git reset --hard')
+    action.set_hard_timeout(600)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert_and_raise(obs.exit_code == 0, f'Failed to git reset --hard: {str(obs)}')
+
+    action = CmdRunAction(
+        command='for remote_name in $(git remote); do git remote remove "${remote_name}"; done'
+    )
+    action.set_hard_timeout(600)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')
+
+    action = CmdRunAction(command='which python')
+    action.set_hard_timeout(600)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert_and_raise(
+        obs.exit_code == 0 and 'testbed' in obs.content,
+        f'Expected to find python interpreter from testbed, but got: {str(obs)}',
+    )
+
+    logger.info('-' * 30)
+    logger.info('END Runtime Initialization Fn')
+    logger.info('-' * 30)
+
+
+def complete_runtime(
+    runtime: Runtime,
+    instance: pd.Series,  # this argument is not required, but it is used to get the workspace_dir_name
+) -> dict[str, Any]:
+    """Complete the runtime for the agent.
+
+    This function is called before the runtime is used to run the agent.
+    If you need to do something in the sandbox to get the correctness metric after
+    the agent has run, modify this function.
+    """
+    logger.info('-' * 30)
+    logger.info('BEGIN Runtime Completion Fn')
+    logger.info('-' * 30)
+    obs: CmdOutputObservation
+    workspace_dir_name = _get_swebench_workspace_dir_name(instance)
+
+    action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
+    action.set_hard_timeout(600)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+    if obs.exit_code == -1:
+        # The previous command is still running
+        # We need to kill previous command
+        logger.info('The previous command is still running, trying to kill it...')
+        action = CmdRunAction(command='C-c')
+        obs = runtime.run_action(action)
+        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+        # Then run the command again
+        action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
+        action.set_hard_timeout(600)
+        logger.info(action, extra={'msg_type': 'ACTION'})
+        obs = runtime.run_action(action)
+        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
+    assert_and_raise(
+        isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
+        f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
+    )
+
+    action = CmdRunAction(command='git config --global core.pager ""')
+    action.set_hard_timeout(600)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert_and_raise(
+        isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
+        f'Failed to git config --global core.pager "": {str(obs)}',
+    )
+
+    # First check for any git repositories in subdirectories
+    action = CmdRunAction(command='find . -type d -name .git -not -path "./.git"')
+    action.set_hard_timeout(600)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert_and_raise(
+        isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
+        f'Failed to find git repositories: {str(obs)}',
+    )
+
+    git_dirs = [p for p in obs.content.strip().split('\n') if p]
+    if git_dirs:
+        # Remove all .git directories in subdirectories
+        for git_dir in git_dirs:
+            action = CmdRunAction(command=f'rm -rf "{git_dir}"')
+            action.set_hard_timeout(600)
+            logger.info(action, extra={'msg_type': 'ACTION'})
+            obs = runtime.run_action(action)
+            logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+            assert_and_raise(
+                isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
+                f'Failed to remove git directory {git_dir}: {str(obs)}',
+            )
+
+    # add all files
+    action = CmdRunAction(command='git add -A')
+    action.set_hard_timeout(600)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert_and_raise(
+        isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
+        f'Failed to git add -A: {str(obs)}',
+    )
+
+    n_retries = 0
+    git_patch = None
+    while n_retries < 5:
+        action = CmdRunAction(
+            command=f'git diff --no-color --cached {instance["base_commit"]}'
+        )
+        action.set_hard_timeout(max(300 + 100 * n_retries, 600))
+        logger.info(action, extra={'msg_type': 'ACTION'})
+        obs = runtime.run_action(action)
+        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+        n_retries += 1
+        if isinstance(obs, CmdOutputObservation):
+            if obs.exit_code == 0:
+                git_patch = obs.content.strip()
+                break
+            else:
+                logger.info('Failed to get git diff, retrying...')
+                sleep_if_should_continue(10)
+        elif isinstance(obs, ErrorObservation):
+            logger.error(f'Error occurred: {obs.content}. Retrying...')
+            sleep_if_should_continue(10)
+        else:
+            assert_and_raise(False, f'Unexpected observation type: {str(obs)}')
+
+    assert_and_raise(git_patch is not None, 'Failed to get git diff (None)')
+
+    logger.info('-' * 30)
+    logger.info('END Runtime Completion Fn')
+    logger.info('-' * 30)
+    return {'git_patch': git_patch}
+
+
+def process_instance(
+    instance: pd.Series,
+    metadata: EvalMetadata,
+    reset_logger: bool = True,
+    runtime_failure_count: int = 0,
+) -> EvalOutput:
+    config = get_config(instance, metadata)
+
+    # Setup the logger properly, so you can run multi-processing to parallelize the evaluation
+    if reset_logger:
+        log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
+        reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
+    else:
+        logger.info(f'Starting evaluation for instance {instance.instance_id}.')
+
+    # Increase resource_factor with increasing attempt_id
+    if runtime_failure_count > 0:
+        config.sandbox.remote_runtime_resource_factor = min(
+            config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
+            8,
+        )
+        logger.warning(
+            f'This is the {runtime_failure_count + 1}th attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
+        )
+    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
+
+    try:
+        initialize_runtime(runtime, instance)
+
+        instruction = get_instruction(instance, metadata)
+
+        # Here's how you can run the agent (similar to the `main` function) and get the final task state
+        state: State | None = asyncio.run(
+            run_controller(
+                config=config,
+                initial_user_action=MessageAction(content=instruction),
+                runtime=runtime,
+                fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
+                    metadata.agent_class
+                ],
+            )
+        )
+
+        # if fatal error, throw EvalError to trigger re-run
+        if is_fatal_evaluation_error(state.last_error):
+            raise EvalException('Fatal error detected: ' + state.last_error)
+
+        # ======= THIS IS SWE-Bench specific =======
+        # Get git patch
+        return_val = complete_runtime(runtime, instance)
+        git_patch = return_val['git_patch']
+        logger.info(
+            f'Got git diff for instance {instance.instance_id}:\n--------\n{git_patch}\n--------'
+        )
+    finally:
+        runtime.close()
+    # ==========================================
+
+    # ======= Attempt to evaluate the agent's edits =======
+    # we use eval_infer.sh to evaluate the agent's edits, not here
+    # because the agent may alter the environment / testcases
+    test_result = {
+        'git_patch': git_patch,
+    }
+
+    # If you are working on some simpler benchmark that only evaluates the final model output (e.g., in a MessageAction)
+    # You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
+    if state is None:
+        raise ValueError('State should not be None.')
+
+    # NOTE: this is NO LONGER the event stream, but an agent history that includes delegate agent's events
+    histories = [event_to_dict(event) for event in state.history]
+    metrics = get_metrics(state)
+
+    # Save the output
+    output = EvalOutput(
+        instance_id=instance.instance_id,
+        instruction=instruction,
+        instance=instance.to_dict(),  # SWE Bench specific
+        test_result=test_result,
+        metadata=metadata,
+        history=histories,
+        metrics=metrics,
+        error=state.last_error if state and state.last_error else None,
+    )
+    return output
+
+
+def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
+    file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.toml')
+    if os.path.exists(file_path):
+        with open(file_path, 'r') as file:
+            data = toml.load(file)
+            if 'selected_ids' in data:
+                selected_ids = data['selected_ids']
+                logger.info(
+                    f'Filtering {len(selected_ids)} tasks from "selected_ids"...'
+                )
+                subset = dataset[dataset[filter_column].isin(selected_ids)]
+                logger.info(f'Retained {subset.shape[0]} tasks after filtering')
+                return subset
+    skip_ids = os.environ.get('SKIP_IDS', '').split(',')
+    if len(skip_ids) > 0:
+        logger.info(f'Filtering {len(skip_ids)} tasks from "SKIP_IDS"...')
+        return dataset[~dataset[filter_column].isin(skip_ids)]
+    return dataset
+
+
+# A list of instances that are known to be tricky to infer
+# (will cause runtime failure even with resource factor = 8)
+SWEGYM_EXCLUDE_IDS = [
+    'dask__dask-10422',
+    'pandas-dev__pandas-50548',
+    'pandas-dev__pandas-53672',
+    'pandas-dev__pandas-54174',
+    'pandas-dev__pandas-55518',
+    'pandas-dev__pandas-58383',
+    'pydata__xarray-6721',
+    'pytest-dev__pytest-10081',
+    'pytest-dev__pytest-7236',
+]
+
+if __name__ == '__main__':
+    parser = get_parser()
+    parser.add_argument(
+        '--dataset',
+        type=str,
+        default='princeton-nlp/SWE-bench',
+        help='data set to evaluate on, either full-test or lite-test',
+    )
+    parser.add_argument(
+        '--split',
+        type=str,
+        default='test',
+        help='split to evaluate on',
+    )
+    args, _ = parser.parse_known_args()
+
+    # NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
+    # so we don't need to manage file uploading to OpenHands's repo
+    dataset = load_dataset(args.dataset, split=args.split)
+    swe_bench_tests = filter_dataset(dataset.to_pandas(), 'instance_id')
+    logger.info(
+        f'Loaded dataset {args.dataset} with split {args.split}: {len(swe_bench_tests)} tasks'
+    )
+    if 'SWE-Gym' in args.dataset:
+        swe_bench_tests = swe_bench_tests[
+            ~swe_bench_tests['instance_id'].isin(SWEGYM_EXCLUDE_IDS)
+        ]
+        logger.info(
+            f'{len(swe_bench_tests)} tasks left after excluding SWE-Gym excluded tasks'
+        )
+
+    llm_config = None
+    if args.llm_config:
+        llm_config = get_llm_config_arg(args.llm_config)
+        llm_config.log_completions = True
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
+
+    if llm_config is None:
+        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
+
+    details = {}
+    _agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)
+
+    dataset_descrption = (
+        args.dataset.replace('/', '__') + '-' + args.split.replace('/', '__')
+    )
+    metadata = make_metadata(
+        llm_config,
+        dataset_descrption,
+        args.agent_cls,
+        args.max_iterations,
+        args.eval_note,
+        args.eval_output_dir,
+        details=details,
+    )
+
+    output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
+    print(f'### OUTPUT FILE: {output_file} ###')
+    instances = prepare_dataset(swe_bench_tests, output_file, args.eval_n_limit)
+
+    if len(instances) > 0 and not isinstance(
+        instances['PASS_TO_PASS'][instances['PASS_TO_PASS'].index[0]], str
+    ):
+        for col in ['PASS_TO_PASS', 'FAIL_TO_PASS']:
+            instances[col] = instances[col].apply(lambda x: str(x))
+
+    run_evaluation(
+        instances,
+        metadata,
+        output_file,
+        args.eval_num_workers,
+        process_instance,
+        timeout_seconds=8 * 60 * 60,  # 8 hour PER instance should be more than enough
+        max_retries=5,
+    )
--- a/evaluation/benchmarks/visual_swe_bench/scripts/docker/all-visualswebench-full-instance-images.txt
+++ b/evaluation/benchmarks/visual_swe_bench/scripts/docker/all-visualswebench-full-instance-images.txt
@@ -0,0 +1,157 @@
+xingyaoww/sweb.eval.x86_64.astropy_s_astropy-11693:latest
+xingyaoww/sweb.eval.x86_64.astropy_s_astropy-13838:latest
+xingyaoww/sweb.eval.x86_64.astropy_s_astropy-14295:latest
+xingyaoww/sweb.eval.x86_64.astropy_s_astropy-8292:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-13908:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-13980:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-13983:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-13984:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-14043:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-14623:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-19763:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-20470:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-20518:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-20584:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-20761:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-20826:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-21443:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-21490:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-21550:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-21568:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-21617:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-22865:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-22871:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-22931:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-23047:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-23111:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-23412:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24088:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24177:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24189:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24570:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24691:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24749:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24768:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24849:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24870:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-24971:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25287:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25334:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25340:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25346:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25405:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25499:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25565:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25640:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25667:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25779:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-26078:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-26466:latest
+xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-2576:latest
+xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-2846:latest
+xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-2979:latest
+xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3180:latest
+xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3187:latest
+xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3202:latest
+xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3216:latest
+xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3217:latest
+xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3276:latest
+xingyaoww/sweb.eval.x86_64.mwaskom_s_seaborn-3394:latest
+xingyaoww/sweb.eval.x86_64.pydata_s_xarray-4182:latest
+xingyaoww/sweb.eval.x86_64.pydata_s_xarray-5682:latest
+xingyaoww/sweb.eval.x86_64.pylint-dev_s_pylint-4551:latest
+xingyaoww/sweb.eval.x86_64.scikit-learn_s_scikit-learn-13087:latest
+xingyaoww/sweb.eval.x86_64.scikit-learn_s_scikit-learn-13618:latest
+xingyaoww/sweb.eval.x86_64.scikit-learn_s_scikit-learn-14067:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-10048:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-10097:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-10191:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-10435:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-11266:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-11502:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-7615:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-7757:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8028:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8056:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8075:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8120:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8265:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8278:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8620:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8621:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8638:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-8658:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9229:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9230:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9289:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9320:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9350:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9464:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9673:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9698:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9797:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9982:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9987:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9997:latest
+xingyaoww/sweb.eval.x86_64.sphinx-doc_s_sphinx-9999:latest
+xingyaoww/sweb.eval.x86_64.sympy_s_sympy-11787:latest
+xingyaoww/sweb.eval.x86_64.sympy_s_sympy-11788:latest
+xingyaoww/sweb.eval.x86_64.sympy_s_sympy-13264:latest
+xingyaoww/sweb.eval.x86_64.sympy_s_sympy-13840:latest
+xingyaoww/sweb.eval.x86_64.sympy_s_sympy-15151:latest
+xingyaoww/sweb.eval.x86_64.sympy_s_sympy-15304:latest
+xingyaoww/sweb.eval.x86_64.sympy_s_sympy-15625:latest
+xingyaoww/sweb.eval.x86_64.sympy_s_sympy-15976:latest
+xingyaoww/sweb.eval.x86_64.sympy_s_sympy-16003:latest
+xingyaoww/sweb.eval.x86_64.sympy_s_sympy-17067:latest
+xingyaoww/sweb.eval.x86_64.sympy_s_sympy-17115:latest
+xingyaoww/sweb.eval.x86_64.sympy_s_sympy-18922:latest
+xingyaoww/sweb.eval.x86_64.sympy_s_sympy-21769:latest
+xingyaoww/sweb.eval.x86_64.sympy_s_sympy-24723:latest
+luolin101/sweb.eval.x86_64.plotly_s_plotly.py-4083:latest
+luolin101/sweb.eval.x86_64.plotly_s_plotly.py-2600:latest
+luolin101/sweb.eval.x86_64.plotly_s_plotly.py-2591:latest
+luolin101/sweb.eval.x86_64.plotly_s_plotly.py-1966:latest
+luolin101/sweb.eval.x86_64.networkx_s_networkx-6503:latest
+luolin101/sweb.eval.x86_64.networkx_s_networkx-6098:latest
+luolin101/sweb.eval.x86_64.networkx_s_networkx-5616:latest
+luolin101/sweb.eval.x86_64.networkx_s_networkx-5354:latest
+luolin101/sweb.eval.x86_64.networkx_s_networkx-5058:latest
+luolin101/sweb.eval.x86_64.networkx_s_networkx-4378:latest
+luolin101/sweb.eval.x86_64.networkx_s_networkx-3764:latest
+luolin101/sweb.eval.x86_64.vega_s_altair-2785:latest
+luolin101/sweb.eval.x86_64.vega_s_altair-1092:latest
+luolin101/sweb.eval.x86_64.vega_s_altair-974:latest
+luolin101/sweb.eval.x86_64.vega_s_altair-830:latest
+luolin101/sweb.eval.x86_64.matplotlib_s_matplotlib-27754:latest
+luolin101/sweb.eval.x86_64.matplotlib_s_matplotlib-26926:latest
+luolin101/sweb.eval.x86_64.matplotlib_s_matplotlib-26788:latest
+luolin101/sweb.eval.x86_64.matplotlib_s_matplotlib-26586:latest
+luolin101/sweb.eval.x86_64.sympy_s_sympy-26941:latest
+luolin101/sweb.eval.x86_64.mwaskom_s_seaborn-3458:latest
+luolin101/sweb.eval.x86_64.mwaskom_s_seaborn-3454:latest
+xingyaoww/sweb.eval.x86_64.matplotlib_s_matplotlib-25631:latest
+xingyaoww/sweb.env.x86_64.428468730904ff6b4232aa:latest
+xingyaoww/sweb.env.x86_64.89a9e6df7ab7bcb9e010c8:latest
+xingyaoww/sweb.env.x86_64.15374367de368534f261e3:latest
+xingyaoww/sweb.env.x86_64.6b007979cf533f0f3016e8:latest
+xingyaoww/sweb.env.x86_64.b382c45e0a94d34ef0fc86:latest
+xingyaoww/sweb.env.x86_64.7037e8c448a4b8ebfe9b13:latest
+xingyaoww/sweb.env.x86_64.31244378a92e3bcce809ac:latest
+xingyaoww/sweb.env.x86_64.efa6065ed5bf204410fd53:latest
+xingyaoww/sweb.env.x86_64.a0efca7a0fe6719dbf65c2:latest
+xingyaoww/sweb.env.x86_64.502d8fc6ebccd881244091:latest
+luolin101/sweb.env.x86_64.eb002359cfcbe2edb56088:latest
+xingyaoww/sweb.env.x86_64.d905bb51fb68acc5d4221b:latest
+xingyaoww/sweb.env.x86_64.aa92880033da20ca313928:latest
+luolin101/sweb.env.x86_64.c6d251a05e0af7688b64fd:latest
+xingyaoww/sweb.env.x86_64.c795f4b88616b8462021ed:latest
+luolin101/sweb.env.x86_64.1e5a06e76ee016d067d77e:latest
+luolin101/sweb.env.x86_64.2e03d8e4d4bd373937a9ef:latest
+luolin101/sweb.env.x86_64.4c16026920d27ea78f3b7a:latest
+luolin101/sweb.env.x86_64.d15120dfdbda9831e9646b:latest
+luolin101/sweb.env.x86_64.c581ba273c3275679773dd:latest
+luolin101/sweb.env.x86_64.dc800a1bbe275c5de0c4aa:latest
+luolin101/sweb.env.x86_64.59bd7d84a0939c7caba7e6:latest
+xingyaoww/sweb.env.x86_64.0d80c7dec81ee2f2f513e2:latest
+xingyaoww/sweb.base.x86_64:latest
--- a/evaluation/benchmarks/visual_swe_bench/scripts/docker/pull_all_eval_docker.sh
+++ b/evaluation/benchmarks/visual_swe_bench/scripts/docker/pull_all_eval_docker.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+set -e
+
+LEVEL=$1
+# three levels:
+# - base, keyword "sweb.base"
+# - env, keyword "sweb.env"
+# - instance, keyword "sweb.eval"
+SET=$2
+
+if [ -z "$LEVEL" ]; then
+    echo "Usage: $0 <cache_level> <set>"
+    echo "cache_level: base, env, or instance"
+    echo "set: lite, full"
+    exit 1
+fi
+
+if [ -z "$SET" ]; then
+    echo "Usage: $0 <cache_level> <set>"
+    echo "cache_level: base, env, or instance"
+    echo "set: lite, full, default is lite"
+    SET="lite"
+fi
+
+
+if [ "$SET" == "full" ]; then
+    IMAGE_FILE="$(dirname "$0")/all-visualswebench-full-instance-images.txt"
+else
+    IMAGE_FILE="$(dirname "$0")/all-visualswebench-full-instance-images.txt"
+fi
+
+# Define a pattern based on the level
+case $LEVEL in
+    base)
+        PATTERN="sweb.base"
+        ;;
+    env)
+        PATTERN="sweb.base\|sweb.env"
+        ;;
+    instance)
+        PATTERN="sweb.base\|sweb.env\|sweb.eval"
+        ;;
+    *)
+        echo "Invalid cache level: $LEVEL"
+        echo "Valid levels are: base, env, instance"
+        exit 1
+        ;;
+esac
+
+echo "Pulling docker images for [$LEVEL] level"
+
+echo "Pattern: $PATTERN"
+echo "Image file: $IMAGE_FILE"
+
+# Read each line from the file, filter by pattern, and pull the docker image
+grep "$PATTERN" "$IMAGE_FILE" | while IFS= read -r image; do
+    echo "Pulling $image into $image"
+    docker pull $image
+    # replace _s_ to __ in the image name
+    renamed_image=$(echo "$image" | sed 's|.*/||; s/_s_/__/g')
+    docker tag $image $renamed_image
+done
--- a/evaluation/benchmarks/visual_swe_bench/scripts/eval_infer.sh
+++ b/evaluation/benchmarks/visual_swe_bench/scripts/eval_infer.sh
@@ -0,0 +1,141 @@
+#!/bin/bash
+
+PROCESS_FILEPATH=$1
+if [ -z "$PROCESS_FILEPATH" ]; then
+    echo "Error: PROCESS_FILEPATH is empty. Usage: ./eval_infer.sh <output_file> [instance_id] [dataset_name] [split]"
+    exit 1
+fi
+
+if [ ! -f $PROCESS_FILEPATH ]; then
+    echo "Error: $PROCESS_FILEPATH is not a file"
+    exit 1
+fi
+
+# If instance_id is empty, it means we want to eval on the whole $PROCESS_FILEPATH
+# otherwise, we want to eval on the instance_id
+INSTANCE_ID=$2
+DATASET_NAME=${3:-"luolin101/Visual-SWE-bench"}
+SPLIT=${4:-"test"}
+
+echo "INSTANCE_ID: $INSTANCE_ID"
+echo "DATASET_NAME: $DATASET_NAME"
+echo "SPLIT: $SPLIT"
+
+PROCESS_FILEPATH=$(realpath $PROCESS_FILEPATH)
+FILE_DIR=$(dirname $PROCESS_FILEPATH)
+FILE_NAME=$(basename $PROCESS_FILEPATH)
+
+echo "Evaluating $FILE_NAME @ $FILE_DIR"
+
+# ================================================
+# detect whether PROCESS_FILEPATH is in OH format or in SWE-bench format
+echo "=============================================================="
+echo "Detecting whether PROCESS_FILEPATH is in OH format or in SWE-bench format"
+echo "=============================================================="
+# SWE-bench format is a JSONL where every line has three fields: model_name_or_path, instance_id, and model_patch
+function is_swebench_format() {
+    # Read the first line of the file
+    read -r first_line < "$PROCESS_FILEPATH"
+
+    # Use jq to check if the first line has the required fields
+    echo "$first_line" | jq -e '. | has("model_name_or_path") and has("instance_id") and has("model_patch")' > /dev/null
+
+    if [ $? -ne 0 ]; then
+        return 1 # Return 1 if the first line does not have the required fields
+    fi
+
+    return 0 # Return 0 if the first line has the required fields
+}
+# Call the function with the file path
+is_swebench_format "$PROCESS_FILEPATH"
+IS_SWEBENCH_FORMAT=$?
+# Use the result in an if-else statement
+if [ $IS_SWEBENCH_FORMAT -eq 0 ]; then
+    echo "The file IS in SWE-bench format."
+    SWEBENCH_FORMAT_JSONL=$PROCESS_FILEPATH
+else
+    echo "The file IS NOT in SWE-bench format."
+
+    # ==== Convert OH format to SWE-bench format ====
+    echo "Merged output file with fine-grained report will be saved to $FILE_DIR"
+    poetry run python3 evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH
+    # replace .jsonl with .swebench.jsonl in filename
+    SWEBENCH_FORMAT_JSONL=${PROCESS_FILEPATH/.jsonl/.swebench.jsonl}
+    echo "SWEBENCH_FORMAT_JSONL: $SWEBENCH_FORMAT_JSONL"
+    # assert that the file exists
+    if [ ! -f $SWEBENCH_FORMAT_JSONL ]; then
+        echo "Error: $SWEBENCH_FORMAT_JSONL does not exist. There is probably an error in the conversion process."
+        exit 1
+    fi
+    SWEBENCH_FORMAT_JSONL=$(realpath $SWEBENCH_FORMAT_JSONL)
+fi
+# ================================================
+
+echo "=============================================================="
+echo "Running SWE-bench evaluation"
+echo "=============================================================="
+
+RUN_ID=$(date +"%Y%m%d_%H%M%S")
+N_PROCESS=16
+
+if [ -z "$INSTANCE_ID" ]; then
+    echo "Running SWE-bench evaluation on the whole input file..."
+    # Default to SWE-Bench-lite
+    # change `--dataset_name` and `--split` to alter dataset
+
+    poetry run python -m visualswebench.harness.run_evaluation \
+        --dataset_name "$DATASET_NAME" \
+        --split "$SPLIT" \
+        --predictions_path $SWEBENCH_FORMAT_JSONL \
+        --timeout 1800 \
+        --cache_level instance \
+        --max_workers $N_PROCESS \
+        --run_id $RUN_ID
+
+    # get the "model_name_or_path" from the first line of the SWEBENCH_FORMAT_JSONL
+    MODEL_NAME_OR_PATH=$(jq -r '.model_name_or_path' $SWEBENCH_FORMAT_JSONL | head -n 1)
+    echo "MODEL_NAME_OR_PATH: $MODEL_NAME_OR_PATH"
+
+    RESULT_OUTPUT_DIR=$(dirname $SWEBENCH_FORMAT_JSONL)
+    echo "RESULT_OUTPUT_DIR: $RESULT_OUTPUT_DIR"
+
+    # move the eval results to the target directory
+    mkdir -p $RESULT_OUTPUT_DIR
+    # rm eval_outputs directory if it exists
+    if [ -d $RESULT_OUTPUT_DIR/eval_outputs ]; then
+        rm -rf $RESULT_OUTPUT_DIR/eval_outputs
+    fi
+
+    mv logs/run_evaluation/$RUN_ID/$MODEL_NAME_OR_PATH $RESULT_OUTPUT_DIR
+    mv $RESULT_OUTPUT_DIR/$MODEL_NAME_OR_PATH $RESULT_OUTPUT_DIR/eval_outputs
+    echo "RUN_ID: $RUN_ID" > $RESULT_OUTPUT_DIR/run_id.txt
+
+    # move report file
+    REPORT_PATH=$MODEL_NAME_OR_PATH.$RUN_ID.json
+    if [ -f $REPORT_PATH ]; then
+        # check if $RESULT_OUTPUT_DIR/report.json exists
+        if [ -f $RESULT_OUTPUT_DIR/report.json ]; then
+            echo "Report file $RESULT_OUTPUT_DIR/report.json already exists. Overwriting..."
+            if [ -f $RESULT_OUTPUT_DIR/report.json.bak ]; then
+                rm $RESULT_OUTPUT_DIR/report.json.bak
+            fi
+            mv $RESULT_OUTPUT_DIR/report.json $RESULT_OUTPUT_DIR/report.json.bak
+        fi
+
+        mv $REPORT_PATH $RESULT_OUTPUT_DIR/report.json
+    fi
+
+    poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH
+
+else
+    echo "Running SWE-bench evaluation on the instance_id: $INSTANCE_ID"
+    poetry run python -m visualswebench.harness.run_evaluation \
+        --dataset_name "$DATASET_NAME" \
+        --split "$SPLIT" \
+        --predictions_path $SWEBENCH_FORMAT_JSONL \
+        --timeout 1800 \
+        --instance_ids $INSTANCE_ID \
+        --cache_level instance \
+        --max_workers $N_PROCESS \
+        --run_id $RUN_ID
+fi
--- a/evaluation/benchmarks/visual_swe_bench/scripts/run_infer.sh
+++ b/evaluation/benchmarks/visual_swe_bench/scripts/run_infer.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+set -eo pipefail
+
+source "evaluation/utils/version_control.sh"
+
+MODEL_CONFIG=$1
+COMMIT_HASH=$2
+AGENT=$3
+EVAL_LIMIT=$4
+MAX_ITER=$5
+NUM_WORKERS=$6
+DATASET=$7
+SPLIT=$8
+N_RUNS=$9
+
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
+checkout_eval_branch
+
+if [ -z "$AGENT" ]; then
+  echo "Agent not specified, use default CodeActAgent"
+  AGENT="CodeActAgent"
+fi
+
+if [ -z "$MAX_ITER" ]; then
+  echo "MAX_ITER not specified, use default 100"
+  MAX_ITER=100
+fi
+
+if [ -z "$USE_INSTANCE_IMAGE" ]; then
+  echo "USE_INSTANCE_IMAGE not specified, use default true"
+  USE_INSTANCE_IMAGE=true
+fi
+
+if [ -z "$RUN_WITH_BROWSING" ]; then
+  echo "RUN_WITH_BROWSING not specified, use default false"
+  RUN_WITH_BROWSING=false
+fi
+
+
+if [ -z "$DATASET" ]; then
+  echo "DATASET not specified, use default luolin101/Visual-SWE-bench"
+  DATASET="luolin101/Visual-SWE-bench"
+fi
+
+if [ -z "$SPLIT" ]; then
+  echo "SPLIT not specified, use default test"
+  SPLIT="test"
+fi
+
+export USE_INSTANCE_IMAGE=$USE_INSTANCE_IMAGE
+echo "USE_INSTANCE_IMAGE: $USE_INSTANCE_IMAGE"
+export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
+echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
+
+get_openhands_version
+
+echo "AGENT: $AGENT"
+echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
+echo "MODEL_CONFIG: $MODEL_CONFIG"
+echo "DATASET: $DATASET"
+echo "SPLIT: $SPLIT"
+
+# Default to NOT use Hint
+if [ -z "$USE_HINT_TEXT" ]; then
+  export USE_HINT_TEXT=false
+fi
+echo "USE_HINT_TEXT: $USE_HINT_TEXT"
+EVAL_NOTE="$OPENHANDS_VERSION"
+# if not using Hint, add -no-hint to the eval note
+if [ "$USE_HINT_TEXT" = false ]; then
+  EVAL_NOTE="$EVAL_NOTE-no-hint"
+fi
+
+if [ "$RUN_WITH_BROWSING" = true ]; then
+  EVAL_NOTE="$EVAL_NOTE-with-browsing"
+fi
+
+if [ -n "$EXP_NAME" ]; then
+  EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
+fi
+
+function run_eval() {
+  local eval_note=$1
+  COMMAND="poetry run python evaluation/benchmarks/visual_swe_bench/run_infer.py \
+    --agent-cls $AGENT \
+    --llm-config $MODEL_CONFIG \
+    --max-iterations $MAX_ITER \
+    --eval-num-workers $NUM_WORKERS \
+    --eval-note $eval_note \
+    --dataset $DATASET \
+    --split $SPLIT"
+
+  if [ -n "$EVAL_LIMIT" ]; then
+    echo "EVAL_LIMIT: $EVAL_LIMIT"
+    COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
+  fi
+
+  # Run the command
+  eval $COMMAND
+}
+
+unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
+if [ -z "$N_RUNS" ]; then
+  N_RUNS=1
+  echo "N_RUNS not specified, use default $N_RUNS"
+fi
+
+for i in $(seq 1 $N_RUNS); do
+  current_eval_note="$EVAL_NOTE-run_$i"
+  echo "EVAL_NOTE: $current_eval_note"
+  run_eval $current_eval_note
+done
+
+checkout_original_branch
--- a/evaluation/benchmarks/visual_swe_bench/scripts/setup/instance_swe_entry.sh
+++ b/evaluation/benchmarks/visual_swe_bench/scripts/setup/instance_swe_entry.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+source ~/.bashrc
+SWEUTIL_DIR=/swe_util
+
+# FIXME: Cannot read SWE_INSTANCE_ID from the environment variable
+# SWE_INSTANCE_ID=django__django-11099
+if [ -z "$SWE_INSTANCE_ID" ]; then
+    echo "Error: SWE_INSTANCE_ID is not set." >&2
+    exit 1
+fi
+
+# Read the swe-bench-test-lite.json file and extract the required item based on instance_id
+item=$(jq --arg INSTANCE_ID "$SWE_INSTANCE_ID" '.[] | select(.instance_id == $INSTANCE_ID)' $SWEUTIL_DIR/eval_data/instances/swe-bench-instance.json)
+
+if [[ -z "$item" ]]; then
+  echo "No item found for the provided instance ID."
+  exit 1
+fi
+
+WORKSPACE_NAME=$(echo "$item" | jq -r '(.repo | tostring) + "__" + (.version | tostring) | gsub("/"; "__")')
+
+echo "WORKSPACE_NAME: $WORKSPACE_NAME"
+
+# Clear the workspace
+if [ -d /workspace ]; then
+    rm -rf /workspace/*
+else
+    mkdir /workspace
+fi
+# Copy repo to workspace
+if [ -d /workspace/$WORKSPACE_NAME ]; then
+    rm -rf /workspace/$WORKSPACE_NAME
+fi
+mkdir -p /workspace
+cp -r /testbed /workspace/$WORKSPACE_NAME
+
+# Activate instance-specific environment
+. /opt/miniconda3/etc/profile.d/conda.sh
+conda activate testbed
--- a/frontend/tests/components/chat-message.test.tsx
+++ b/frontend/tests/components/chat-message.test.tsx
@@ -10,11 +10,7 @@ describe("ChatMessage", () => {
    expect(screen.getByText("Hello, World!")).toBeInTheDocument();
  });

-  it("should render an assistant message", () => {
-    render(<ChatMessage type="assistant" message="Hello, World!" />);
-    expect(screen.getByTestId("assistant-message")).toBeInTheDocument();
-    expect(screen.getByText("Hello, World!")).toBeInTheDocument();
-  });
+  it.todo("should render an assistant message");

  it.skip("should support code syntax highlighting", () => {
    const code = "```js\nconsole.log('Hello, World!')\n```";
@@ -66,10 +62,7 @@ describe("ChatMessage", () => {

  it("should apply correct styles to inline code", () => {
    render(
-      <ChatMessage
-        type="assistant"
-        message="Here is some `inline code` text"
-      />,
+      <ChatMessage type="agent" message="Here is some `inline code` text" />,
    );
    const codeElement = screen.getByText("inline code");

--- a/frontend/tests/components/chat/chat-interface.test.tsx
+++ b/frontend/tests/components/chat/chat-interface.test.tsx
@@ -1,11 +1,9 @@
 import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
-import { act, screen, waitFor, within } from "@testing-library/react";
+import { screen, waitFor, within } from "@testing-library/react";
 import userEvent from "@testing-library/user-event";
 import { renderWithProviders } from "test-utils";
 import type { Message } from "#/message";
-import { addUserMessage } from "#/state/chat-slice";
 import { SUGGESTIONS } from "#/utils/suggestions";
-import * as ChatSlice from "#/state/chat-slice";
 import { WsClientProviderStatus } from "#/context/ws-client-provider";
 import { ChatInterface } from "#/components/features/chat/chat-interface";

@@ -42,51 +40,10 @@ describe("Empty state", () => {
    vi.clearAllMocks();
  });

-  it("should render suggestions if empty", () => {
-    const { store } = renderWithProviders(<ChatInterface />, {
-      preloadedState: {
-        chat: {
-          messages: [],
-          systemMessage: {
-            content: "",
-            tools: [],
-            openhands_version: null,
-            agent_class: null
-          }
-        },
-      },
-    });
-
-    expect(screen.getByTestId("suggestions")).toBeInTheDocument();
-
-    act(() => {
-      store.dispatch(
-        addUserMessage({
-          content: "Hello",
-          imageUrls: [],
-          timestamp: new Date().toISOString(),
-          pending: true,
-        }),
-      );
-    });
-
-    expect(screen.queryByTestId("suggestions")).not.toBeInTheDocument();
-  });
+  it.todo("should render suggestions if empty");

  it("should render the default suggestions", () => {
-    renderWithProviders(<ChatInterface />, {
-      preloadedState: {
-        chat: {
-          messages: [],
-          systemMessage: {
-            content: "",
-            tools: [],
-            openhands_version: null,
-            agent_class: null
-          }
-        },
-      },
-    });
+    renderWithProviders(<ChatInterface />);

    const suggestions = screen.getByTestId("suggestions");
    const repoSuggestions = Object.keys(SUGGESTIONS.repo);
@@ -110,21 +67,8 @@ describe("Empty state", () => {
        status: WsClientProviderStatus.CONNECTED,
        isLoadingMessages: false,
      }));
-      const addUserMessageSpy = vi.spyOn(ChatSlice, "addUserMessage");
      const user = userEvent.setup();
-      const { store } = renderWithProviders(<ChatInterface />, {
-        preloadedState: {
-          chat: {
-            messages: [],
-            systemMessage: {
-              content: "",
-              tools: [],
-              openhands_version: null,
-              agent_class: null
-            }
-          },
-        },
-      });
+      renderWithProviders(<ChatInterface />);

      const suggestions = screen.getByTestId("suggestions");
      const displayedSuggestions = within(suggestions).getAllByRole("button");
@@ -133,9 +77,7 @@ describe("Empty state", () => {
      await user.click(displayedSuggestions[0]);

      // user message loaded to input
-      expect(addUserMessageSpy).not.toHaveBeenCalled();
      expect(screen.queryByTestId("suggestions")).toBeInTheDocument();
-      expect(store.getState().chat.messages).toHaveLength(0);
      expect(input).toHaveValue(displayedSuggestions[0].textContent);
    },
  );
@@ -149,19 +91,7 @@ describe("Empty state", () => {
        isLoadingMessages: false,
      }));
      const user = userEvent.setup();
-      const { rerender } = renderWithProviders(<ChatInterface />, {
-        preloadedState: {
-          chat: {
-            messages: [],
-            systemMessage: {
-              content: "",
-              tools: [],
-              openhands_version: null,
-              agent_class: null
-            }
-          },
-        },
-      });
+      const { rerender } = renderWithProviders(<ChatInterface />);

      const suggestions = screen.getByTestId("suggestions");
      const displayedSuggestions = within(suggestions).getAllByRole("button");
--- a/frontend/tests/components/context-menu/account-settings-context-menu.test.tsx
+++ b/frontend/tests/components/context-menu/account-settings-context-menu.test.tsx
@@ -20,7 +20,6 @@ describe("AccountSettingsContextMenu", () => {
      <AccountSettingsContextMenu
        onLogout={onLogoutMock}
        onClose={onCloseMock}
-        isLoggedIn
      />,
    );

@@ -35,7 +34,6 @@ describe("AccountSettingsContextMenu", () => {
      <AccountSettingsContextMenu
        onLogout={onLogoutMock}
        onClose={onCloseMock}
-        isLoggedIn
      />,
    );

@@ -45,19 +43,18 @@ describe("AccountSettingsContextMenu", () => {
    expect(onLogoutMock).toHaveBeenCalledOnce();
  });

-  test("onLogout should be disabled if the user is not logged in", async () => {
+  test("logout button is always enabled", async () => {
    render(
      <AccountSettingsContextMenu
        onLogout={onLogoutMock}
        onClose={onCloseMock}
-        isLoggedIn={false}
      />,
    );

    const logoutOption = screen.getByText("ACCOUNT_SETTINGS$LOGOUT");
    await user.click(logoutOption);

-    expect(onLogoutMock).not.toHaveBeenCalled();
+    expect(onLogoutMock).toHaveBeenCalledOnce();
  });

  it("should call onClose when clicking outside of the element", async () => {
@@ -65,7 +62,6 @@ describe("AccountSettingsContextMenu", () => {
      <AccountSettingsContextMenu
        onLogout={onLogoutMock}
        onClose={onCloseMock}
-        isLoggedIn
      />,
    );

--- a/frontend/tests/components/features/home/repo-connector.test.tsx
+++ b/frontend/tests/components/features/home/repo-connector.test.tsx
@@ -22,7 +22,7 @@ const renderRepoConnector = () => {
      path: "/conversations/:conversationId",
    },
    {
-      Component: Outlet,
+      Component: () => <Outlet />,
      path: "/settings",
      children: [
        {
--- a/frontend/tests/components/features/home/task-suggestions.test.tsx
+++ b/frontend/tests/components/features/home/task-suggestions.test.tsx
@@ -11,7 +11,7 @@ import { MOCK_TASKS } from "#/mocks/task-suggestions-handlers";
 const renderTaskSuggestions = () => {
  const RouterStub = createRoutesStub([
    {
-      Component: TaskSuggestions,
+      Component: () => <TaskSuggestions />,
      path: "/",
    },
    {
--- a/frontend/tests/components/file-operations.test.tsx
+++ b/frontend/tests/components/file-operations.test.tsx
@@ -1,92 +1,11 @@
-import { render, screen } from "@testing-library/react";
-import { describe, it, expect, vi } from "vitest";
-import { Messages } from "#/components/features/chat/messages";
-import type { Message } from "#/message";
-import { renderWithProviders } from "test-utils";
-
-// Mock the useParams hook to provide a conversationId
-vi.mock("react-router", async () => {
-  const actual = await vi.importActual<typeof import("react-router")>("react-router");
-  return {
-    ...actual,
-    useParams: () => ({ conversationId: "test-conversation-id" }),
-  };
-});
+import { describe, it } from "vitest";

 describe("File Operations Messages", () => {
-  it("should show success indicator for successful file read operation", () => {
-    const messages: Message[] = [
-      {
-        type: "action",
-        translationID: "read_file_contents",
-        content: "Successfully read file contents",
-        success: true,
-        sender: "assistant",
-        timestamp: new Date().toISOString(),
-      },
-    ];
+  it.todo("should show success indicator for successful file read operation");

-    renderWithProviders(<Messages messages={messages} isAwaitingUserConfirmation={false} />);
+  it.todo("should show failure indicator for failed file read operation");

-    const statusIcon = screen.getByTestId("status-icon");
-    expect(statusIcon).toBeInTheDocument();
-    expect(statusIcon.closest("svg")).toHaveClass("fill-success");
-  });
+  it.todo("should show success indicator for successful file edit operation");

-  it("should show failure indicator for failed file read operation", () => {
-    const messages: Message[] = [
-      {
-        type: "action",
-        translationID: "read_file_contents",
-        content: "Failed to read file contents",
-        success: false,
-        sender: "assistant",
-        timestamp: new Date().toISOString(),
-      },
-    ];
-
-    renderWithProviders(<Messages messages={messages} isAwaitingUserConfirmation={false} />);
-
-    const statusIcon = screen.getByTestId("status-icon");
-    expect(statusIcon).toBeInTheDocument();
-    expect(statusIcon.closest("svg")).toHaveClass("fill-danger");
-  });
-
-  it("should show success indicator for successful file edit operation", () => {
-    const messages: Message[] = [
-      {
-        type: "action",
-        translationID: "edit_file_contents",
-        content: "Successfully edited file contents",
-        success: true,
-        sender: "assistant",
-        timestamp: new Date().toISOString(),
-      },
-    ];
-
-    renderWithProviders(<Messages messages={messages} isAwaitingUserConfirmation={false} />);
-
-    const statusIcon = screen.getByTestId("status-icon");
-    expect(statusIcon).toBeInTheDocument();
-    expect(statusIcon.closest("svg")).toHaveClass("fill-success");
-  });
-
-  it("should show failure indicator for failed file edit operation", () => {
-    const messages: Message[] = [
-      {
-        type: "action",
-        translationID: "edit_file_contents",
-        content: "Failed to edit file contents",
-        success: false,
-        sender: "assistant",
-        timestamp: new Date().toISOString(),
-      },
-    ];
-
-    renderWithProviders(<Messages messages={messages} isAwaitingUserConfirmation={false} />);
-
-    const statusIcon = screen.getByTestId("status-icon");
-    expect(statusIcon).toBeInTheDocument();
-    expect(statusIcon.closest("svg")).toHaveClass("fill-danger");
-  });
+  it.todo("should show failure indicator for failed file edit operation");
 });
--- a/frontend/tests/components/user-actions.test.tsx
+++ b/frontend/tests/components/user-actions.test.tsx
@@ -57,7 +57,7 @@ describe("UserActions", () => {
    ).not.toBeInTheDocument();
  });

-  test("onLogout should not be called when the user is not logged in", async () => {
+  test("logout button is always enabled", async () => {
    render(<UserActions onLogout={onLogoutMock} />);

    const userAvatar = screen.getByTestId("user-avatar");
@@ -66,6 +66,6 @@ describe("UserActions", () => {
    const logoutOption = screen.getByText("ACCOUNT_SETTINGS$LOGOUT");
    await user.click(logoutOption);

-    expect(onLogoutMock).not.toHaveBeenCalled();
+    expect(onLogoutMock).toHaveBeenCalledOnce();
  });
 });
--- a/frontend/tests/context/ws-client-provider.test.tsx
+++ b/frontend/tests/context/ws-client-provider.test.tsx
@@ -2,7 +2,6 @@ import { describe, it, expect, vi, beforeEach } from "vitest";
 import { render, waitFor } from "@testing-library/react";
 import React from "react";
 import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
-import * as ChatSlice from "#/state/chat-slice";
 import {
  updateStatusWhenErrorMessagePresent,
  WsClientProvider,
@@ -11,42 +10,15 @@ import {

 describe("Propagate error message", () => {
  it("should do nothing when no message was passed from server", () => {
-    const addErrorMessageSpy = vi.spyOn(ChatSlice, "addErrorMessage");
    updateStatusWhenErrorMessagePresent(null);
    updateStatusWhenErrorMessagePresent(undefined);
    updateStatusWhenErrorMessagePresent({});
    updateStatusWhenErrorMessagePresent({ message: null });
-
-    expect(addErrorMessageSpy).not.toHaveBeenCalled();
  });

-  it("should display error to user when present", () => {
-    const message = "We have a problem!";
-    const addErrorMessageSpy = vi.spyOn(ChatSlice, "addErrorMessage");
-    updateStatusWhenErrorMessagePresent({ message });
+  it.todo("should display error to user when present");

-    expect(addErrorMessageSpy).toHaveBeenCalledWith({
-      message,
-      status_update: true,
-      type: "error",
-    });
-  });
-
-  it("should display error including translation id when present", () => {
-    const message = "We have a problem!";
-    const addErrorMessageSpy = vi.spyOn(ChatSlice, "addErrorMessage");
-    updateStatusWhenErrorMessagePresent({
-      message,
-      data: { msg_id: "..id.." },
-    });
-
-    expect(addErrorMessageSpy).toHaveBeenCalledWith({
-      message,
-      id: "..id..",
-      status_update: true,
-      type: "error",
-    });
-  });
+  it.todo("should display error including translation id when present");
 });

 // Create a mock for socket.io-client
--- a/frontend/tests/i18n/translations.test.tsx
+++ b/frontend/tests/i18n/translations.test.tsx
@@ -11,7 +11,6 @@ describe("Translations", () => {
      <AccountSettingsContextMenu
        onLogout={() => {}}
        onClose={() => {}}
-        isLoggedIn
      />,
    );
    expect(
--- a/frontend/tests/routes/secrets-settings.test.tsx
+++ b/frontend/tests/routes/secrets-settings.test.tsx
@@ -22,7 +22,7 @@ const MOCK_GET_SECRETS_RESPONSE: GetSecretsResponse["custom_secrets"] = [

 const RouterStub = createRoutesStub([
  {
-    Component: Outlet,
+    Component: () => <Outlet />,
    path: "/settings",
    children: [
      {
--- a/frontend/tests/services/actions.test.ts
+++ b/frontend/tests/services/actions.test.ts
@@ -1,146 +0,0 @@
-import { describe, it, expect, vi, beforeEach } from "vitest";
-import { handleStatusMessage, handleActionMessage } from "#/services/actions";
-import store from "#/store";
-import { trackError } from "#/utils/error-handler";
-import ActionType from "#/types/action-type";
-import { ActionMessage } from "#/types/message";
-
-// Mock dependencies
-vi.mock("#/utils/error-handler", () => ({
-  trackError: vi.fn(),
-}));
-
-vi.mock("#/store", () => ({
-  default: {
-    dispatch: vi.fn(),
-  },
-}));
-
-describe("Actions Service", () => {
-  beforeEach(() => {
-    vi.clearAllMocks();
-  });
-
-  describe("handleStatusMessage", () => {
-    it("should dispatch info messages to status state", () => {
-      const message = {
-        type: "info",
-        message: "Runtime is not available",
-        id: "runtime.unavailable",
-        status_update: true as const,
-      };
-
-      handleStatusMessage(message);
-
-      expect(store.dispatch).toHaveBeenCalledWith(expect.objectContaining({
-        payload: message,
-      }));
-    });
-
-    it("should log error messages and display them in chat", () => {
-      const message = {
-        type: "error",
-        message: "Runtime connection failed",
-        id: "runtime.connection.failed",
-        status_update: true as const,
-      };
-
-      handleStatusMessage(message);
-
-      expect(trackError).toHaveBeenCalledWith({
-        message: "Runtime connection failed",
-        source: "chat",
-        metadata: { msgId: "runtime.connection.failed" },
-      });
-
-      expect(store.dispatch).toHaveBeenCalledWith(expect.objectContaining({
-        payload: message,
-      }));
-    });
-  });
-
-  describe("handleActionMessage", () => {
-    it("should use first-person perspective for task completion messages", () => {
-      // Test partial completion
-      const messagePartial: ActionMessage = {
-        id: 1,
-        action: ActionType.FINISH,
-        source: "agent",
-        message: "",
-        timestamp: new Date().toISOString(),
-        args: {
-          final_thought: "",
-          task_completed: "partial",
-          outputs: "",
-          thought: ""
-        }
-      };
-
-      // Mock implementation to capture the message
-      let capturedPartialMessage = "";
-      (store.dispatch as any).mockImplementation((action: any) => {
-        if (action.type === "chat/addAssistantMessage" &&
-            action.payload.includes("believe that the task was **completed partially**")) {
-          capturedPartialMessage = action.payload;
-        }
-      });
-
-      handleActionMessage(messagePartial);
-      expect(capturedPartialMessage).toContain("I believe that the task was **completed partially**");
-
-      // Test not completed
-      const messageNotCompleted: ActionMessage = {
-        id: 2,
-        action: ActionType.FINISH,
-        source: "agent",
-        message: "",
-        timestamp: new Date().toISOString(),
-        args: {
-          final_thought: "",
-          task_completed: "false",
-          outputs: "",
-          thought: ""
-        }
-      };
-
-      // Mock implementation to capture the message
-      let capturedNotCompletedMessage = "";
-      (store.dispatch as any).mockImplementation((action: any) => {
-        if (action.type === "chat/addAssistantMessage" &&
-            action.payload.includes("believe that the task was **not completed**")) {
-          capturedNotCompletedMessage = action.payload;
-        }
-      });
-
-      handleActionMessage(messageNotCompleted);
-      expect(capturedNotCompletedMessage).toContain("I believe that the task was **not completed**");
-
-      // Test completed successfully
-      const messageCompleted: ActionMessage = {
-        id: 3,
-        action: ActionType.FINISH,
-        source: "agent",
-        message: "",
-        timestamp: new Date().toISOString(),
-        args: {
-          final_thought: "",
-          task_completed: "true",
-          outputs: "",
-          thought: ""
-        }
-      };
-
-      // Mock implementation to capture the message
-      let capturedCompletedMessage = "";
-      (store.dispatch as any).mockImplementation((action: any) => {
-        if (action.type === "chat/addAssistantMessage" &&
-            action.payload.includes("believe that the task was **completed successfully**")) {
-          capturedCompletedMessage = action.payload;
-        }
-      });
-
-      handleActionMessage(messageCompleted);
-      expect(capturedCompletedMessage).toContain("I believe that the task was **completed successfully**");
-    });
-  });
-});
--- a/frontend/tests/services/observations.test.ts
+++ b/frontend/tests/services/observations.test.ts
@@ -1,51 +0,0 @@
-import { beforeEach, describe, expect, it, vi } from "vitest";
-import { handleObservationMessage } from "#/services/observations";
-import store from "#/store";
-import { ObservationMessage } from "#/types/message";
-
-// Mock dependencies
-vi.mock("#/store", () => ({
-  default: {
-    dispatch: vi.fn(),
-  },
-}));
-
-describe("Observations Service", () => {
-  beforeEach(() => {
-    vi.clearAllMocks();
-  });
-
-  describe("handleObservationMessage", () => {
-    const createErrorMessage = (): ObservationMessage => ({
-      id: 14,
-      timestamp: "2025-04-14T13:37:54.451843",
-      message: "The action has not been executed.",
-      cause: 12,
-      observation: "error",
-      content: "The action has not been executed.",
-      extras: {
-        error_id: "",
-        metadata: {},
-      },
-    });
-
-    it("should dispatch error messages exactly once", () => {
-      const errorMessage = createErrorMessage();
-
-      handleObservationMessage(errorMessage);
-
-      expect(store.dispatch).toHaveBeenCalledTimes(1);
-      expect(store.dispatch).toHaveBeenCalledWith({
-        type: "chat/addAssistantObservation",
-        payload: expect.objectContaining({
-          observation: "error",
-          content: "The action has not been executed.",
-          source: "user",
-          extras: {
-            error_id: "",
-          },
-        }),
-      });
-    });
-  });
-});
--- a/frontend/tests/services/observations.test.tsx
+++ b/frontend/tests/services/observations.test.tsx
@@ -1,8 +1,4 @@
-import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
-import { handleObservationMessage } from "#/services/observations";
-import { setScreenshotSrc, setUrl } from "#/state/browser-slice";
-import ObservationType from "#/types/observation-type";
-import store from "#/store";
+import { describe, it, vi, beforeEach, afterEach } from "vitest";

 // Mock the store module
 vi.mock("#/store", () => ({
@@ -20,43 +16,9 @@ describe("handleObservationMessage", () => {
    vi.resetAllMocks();
  });

-  it("updates browser state when receiving a browse observation", () => {
-    const message = {
-      id: "test-id",
-      cause: "test-cause",
-      observation: ObservationType.BROWSE,
-      content: "test content",
-      message: "test message",
-      extras: {
-        url: "https://example.com",
-        screenshot: "base64-screenshot-data",
-      },
-    };
+  it.todo("updates browser state when receiving a browse observation");

-    handleObservationMessage(message);
-
-    // Check that setScreenshotSrc and setUrl were called with the correct values
-    expect(store.dispatch).toHaveBeenCalledWith(setScreenshotSrc("base64-screenshot-data"));
-    expect(store.dispatch).toHaveBeenCalledWith(setUrl("https://example.com"));
-  });
-
-  it("updates browser state when receiving a browse_interactive observation", () => {
-    const message = {
-      id: "test-id",
-      cause: "test-cause",
-      observation: ObservationType.BROWSE_INTERACTIVE,
-      content: "test content",
-      message: "test message",
-      extras: {
-        url: "https://example.com",
-        screenshot: "base64-screenshot-data",
-      },
-    };
-
-    handleObservationMessage(message);
-
-    // Check that setScreenshotSrc and setUrl were called with the correct values
-    expect(store.dispatch).toHaveBeenCalledWith(setScreenshotSrc("base64-screenshot-data"));
-    expect(store.dispatch).toHaveBeenCalledWith(setUrl("https://example.com"));
-  });
+  it.todo(
+    "updates browser state when receiving a browse_interactive observation",
+  );
 });
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -8,30 +8,30 @@
  },
  "dependencies": {
    "@heroui/react": "2.7.8",
-    "@microlink/react-json-view": "^1.26.1",
+    "@microlink/react-json-view": "^1.26.2",
    "@monaco-editor/react": "^4.7.0-rc.0",
-    "@react-router/node": "^7.5.3",
-    "@react-router/serve": "^7.5.3",
+    "@react-router/node": "^7.6.0",
+    "@react-router/serve": "^7.6.0",
    "@react-types/shared": "^3.29.0",
-    "@reduxjs/toolkit": "^2.7.0",
+    "@reduxjs/toolkit": "^2.8.2",
    "@stripe/react-stripe-js": "^3.7.0",
    "@stripe/stripe-js": "^7.3.0",
-    "@tanstack/react-query": "^5.75.4",
+    "@tanstack/react-query": "^5.76.1",
    "@vitejs/plugin-react": "^4.4.0",
    "@xterm/addon-fit": "^0.10.0",
    "@xterm/xterm": "^5.4.0",
    "axios": "^1.9.0",
    "clsx": "^2.1.1",
    "eslint-config-airbnb-typescript": "^18.0.0",
-    "framer-motion": "^12.10.0",
-    "i18next": "^25.1.1",
+    "framer-motion": "^12.12.1",
+    "i18next": "^25.1.3",
    "i18next-browser-languagedetector": "^8.1.0",
    "i18next-http-backend": "^3.0.2",
-    "isbot": "^5.1.27",
+    "isbot": "^5.1.28",
    "jose": "^6.0.11",
-    "lucide-react": "^0.507.0",
+    "lucide-react": "^0.511.0",
    "monaco-editor": "^0.52.2",
-    "posthog-js": "^1.239.1",
+    "posthog-js": "^1.242.2",
    "react": "^19.1.0",
    "react-dom": "^19.1.0",
    "react-highlight": "^0.15.0",
@@ -40,15 +40,15 @@
    "react-icons": "^5.5.0",
    "react-markdown": "^10.1.0",
    "react-redux": "^9.2.0",
-    "react-router": "^7.5.3",
+    "react-router": "^7.6.0",
    "react-syntax-highlighter": "^15.6.1",
    "react-textarea-autosize": "^8.5.9",
    "remark-gfm": "^4.0.1",
    "sirv-cli": "^3.0.1",
    "socket.io-client": "^4.8.1",
-    "tailwind-merge": "^3.2.0",
+    "tailwind-merge": "^3.3.0",
    "vite": "^6.3.5",
-    "web-vitals": "^3.5.2",
+    "web-vitals": "^5.0.1",
    "ws": "^8.18.2"
  },
  "scripts": {
@@ -83,16 +83,16 @@
    "@babel/types": "^7.27.0",
    "@mswjs/socket.io-binding": "^0.1.1",
    "@playwright/test": "^1.52.0",
-    "@react-router/dev": "^7.5.3",
+    "@react-router/dev": "^7.6.0",
    "@tailwindcss/typography": "^0.5.16",
    "@tanstack/eslint-plugin-query": "^5.74.7",
    "@testing-library/dom": "^10.4.0",
    "@testing-library/jest-dom": "^6.6.1",
    "@testing-library/react": "^16.3.0",
    "@testing-library/user-event": "^14.6.1",
-    "@types/node": "^22.15.12",
-    "@types/react": "^19.1.3",
-    "@types/react-dom": "^19.1.3",
+    "@types/node": "^22.15.18",
+    "@types/react": "^19.1.4",
+    "@types/react-dom": "^19.1.5",
    "@types/react-highlight": "^0.12.8",
    "@types/react-syntax-highlighter": "^15.5.13",
    "@types/ws": "^8.18.1",
@@ -104,7 +104,7 @@
    "eslint": "^8.57.0",
    "eslint-config-airbnb": "^19.0.4",
    "eslint-config-airbnb-typescript": "^18.0.0",
-    "eslint-config-prettier": "^10.1.3",
+    "eslint-config-prettier": "^10.1.5",
    "eslint-plugin-import": "^2.29.1",
    "eslint-plugin-jsx-a11y": "^6.10.2",
    "eslint-plugin-prettier": "^5.4.0",
@@ -113,7 +113,7 @@
    "eslint-plugin-unused-imports": "^4.1.4",
    "husky": "^9.1.7",
    "jsdom": "^26.1.0",
-    "lint-staged": "^15.5.2",
+    "lint-staged": "^16.0.0",
    "msw": "^2.6.6",
    "postcss": "^8.5.2",
    "prettier": "^3.5.3",
--- a/frontend/src/api/open-hands.ts
+++ b/frontend/src/api/open-hands.ts
@@ -76,9 +76,9 @@ class OpenHands {
  ): Promise<boolean> {
    if (appMode === "oss") return true;

-    const response =
-      await openHands.post<AuthenticateResponse>("/api/authenticate");
-    return response.status === 200;
+    // Just make the request, if it succeeds (no exception thrown), return true
+    await openHands.post<AuthenticateResponse>("/api/authenticate");
+    return true;
  }

  /**
--- a/frontend/src/components/features/chat/chat-interface.tsx
+++ b/frontend/src/components/features/chat/chat-interface.tsx
@@ -1,4 +1,4 @@
-import { useDispatch, useSelector } from "react-redux";
+import { useSelector } from "react-redux";
 import React from "react";
 import posthog from "posthog-js";
 import { useParams } from "react-router";
@@ -8,7 +8,6 @@ import { convertImageToBase64 } from "#/utils/convert-image-to-base-64";
 import { TrajectoryActions } from "../trajectory/trajectory-actions";
 import { createChatMessage } from "#/services/chat-service";
 import { InteractiveChatBox } from "./interactive-chat-box";
-import { addUserMessage } from "#/state/chat-slice";
 import { RootState } from "#/store";
 import { AgentState } from "#/types/agent-state";
 import { generateAgentStateChangeEvent } from "#/services/agent-state-service";
@@ -25,6 +24,11 @@ import { LoadingSpinner } from "#/components/shared/loading-spinner";
 import { useGetTrajectory } from "#/hooks/mutation/use-get-trajectory";
 import { downloadTrajectory } from "#/utils/download-trajectory";
 import { displayErrorToast } from "#/utils/custom-toast-handlers";
+import { useOptimisticUserMessage } from "#/hooks/use-optimistic-user-message";
+import { useWSErrorMessage } from "#/hooks/use-ws-error-message";
+import i18n from "#/i18n";
+import { ErrorMessageBanner } from "./error-message-banner";
+import { shouldRenderEvent } from "./event-content-helpers/should-render-event";

 function getEntryPoint(
  hasRepository: boolean | null,
@@ -36,14 +40,15 @@ function getEntryPoint(
 }

 export function ChatInterface() {
-  const { send, isLoadingMessages } = useWsClient();
-  const dispatch = useDispatch();
+  const { getErrorMessage } = useWSErrorMessage();
+  const { send, isLoadingMessages, parsedEvents } = useWsClient();
+  const { setOptimisticUserMessage, getOptimisticUserMessage } =
+    useOptimisticUserMessage();
  const { t } = useTranslation();
  const scrollRef = React.useRef<HTMLDivElement>(null);
  const { scrollDomToBottom, onChatBodyScroll, hitBottom } =
    useScrollToBottom(scrollRef);

-  const { messages } = useSelector((state: RootState) => state.chat);
  const { curAgentState } = useSelector((state: RootState) => state.agent);

  const [feedbackPolarity, setFeedbackPolarity] = React.useState<
@@ -57,8 +62,13 @@ export function ChatInterface() {
  const params = useParams();
  const { mutate: getTrajectory } = useGetTrajectory();

+  const optimisticUserMessage = getOptimisticUserMessage();
+  const errorMessage = getErrorMessage();
+
+  const events = parsedEvents.filter(shouldRenderEvent);
+
  const handleSendMessage = async (content: string, files: File[]) => {
-    if (messages.length === 0) {
+    if (events.length === 0) {
      posthog.capture("initial_query_submitted", {
        entry_point: getEntryPoint(
          selectedRepository !== null,
@@ -69,7 +79,7 @@ export function ChatInterface() {
      });
    } else {
      posthog.capture("user_message_sent", {
-        session_message_count: messages.length,
+        session_message_count: events.length,
        current_message_length: content.length,
      });
    }
@@ -77,9 +87,8 @@ export function ChatInterface() {
    const imageUrls = await Promise.all(promises);

    const timestamp = new Date().toISOString();
-    const pending = true;
-    dispatch(addUserMessage({ content, imageUrls, timestamp, pending }));
    send(createChatMessage(content, imageUrls, timestamp));
+    setOptimisticUserMessage(content);
    setMessageToSend(null);
  };

@@ -120,7 +129,7 @@ export function ChatInterface() {

  return (
    <div className="h-full flex flex-col justify-between">
-      {messages.length === 0 && (
+      {events.length === 0 && !optimisticUserMessage && (
        <ChatSuggestions onSuggestionsClick={setMessageToSend} />
      )}

@@ -137,7 +146,7 @@ export function ChatInterface() {

        {!isLoadingMessages && (
          <Messages
-            messages={messages}
+            messages={events}
            isAwaitingUserConfirmation={
              curAgentState === AgentState.AWAITING_USER_CONFIRMATION
            }
@@ -170,6 +179,12 @@ export function ChatInterface() {
          {!hitBottom && <ScrollToBottomButton onClick={scrollDomToBottom} />}
        </div>

+        {errorMessage && (
+          <ErrorMessageBanner
+            message={i18n.exists(errorMessage) ? t(errorMessage) : errorMessage}
+          />
+        )}
+
        <InteractiveChatBox
          onSubmit={handleSendMessage}
          onStop={handleStop}
--- a/frontend/src/components/features/chat/chat-message.tsx
+++ b/frontend/src/components/features/chat/chat-message.tsx
@@ -6,10 +6,11 @@ import { cn } from "#/utils/utils";
 import { ul, ol } from "../markdown/list";
 import { CopyToClipboardButton } from "#/components/shared/buttons/copy-to-clipboard-button";
 import { anchor } from "../markdown/anchor";
+import { OpenHandsSourceType } from "#/types/core/base";
 import { paragraph } from "../markdown/paragraph";

 interface ChatMessageProps {
-  type: "user" | "assistant";
+  type: OpenHandsSourceType;
  message: string;
 }

@@ -49,7 +50,7 @@ export function ChatMessage({
        "rounded-xl relative",
        "flex flex-col gap-2",
        type === "user" && " max-w-[305px] p-4 bg-tertiary self-end",
-        type === "assistant" && "mt-6 max-w-full bg-transparent",
+        type === "agent" && "mt-6 max-w-full bg-transparent",
      )}
    >
      <CopyToClipboardButton
--- a/frontend/src/components/features/chat/error-message-banner.tsx
+++ b/frontend/src/components/features/chat/error-message-banner.tsx
@@ -0,0 +1,11 @@
+interface ErrorMessageBannerProps {
+  message: string;
+}
+
+export function ErrorMessageBanner({ message }: ErrorMessageBannerProps) {
+  return (
+    <div className="w-full rounded-lg p-2 text-black border border-red-800 bg-red-500">
+      {message}
+    </div>
+  );
+}
--- a/frontend/src/components/features/chat/error-message.tsx
+++ b/frontend/src/components/features/chat/error-message.tsx
@@ -0,0 +1,56 @@
+import React from "react";
+import Markdown from "react-markdown";
+import remarkGfm from "remark-gfm";
+import { useTranslation } from "react-i18next";
+import { code } from "../markdown/code";
+import { ol, ul } from "../markdown/list";
+import ArrowDown from "#/icons/angle-down-solid.svg?react";
+import ArrowUp from "#/icons/angle-up-solid.svg?react";
+import i18n from "#/i18n";
+
+interface ErrorMessageProps {
+  errorId?: string;
+  defaultMessage: string;
+}
+
+export function ErrorMessage({ errorId, defaultMessage }: ErrorMessageProps) {
+  const { t } = useTranslation();
+  const [showDetails, setShowDetails] = React.useState(false);
+
+  const hasValidTranslationId = !!errorId && i18n.exists(errorId);
+  const errorKey = hasValidTranslationId
+    ? errorId
+    : "CHAT_INTERFACE$AGENT_ERROR_MESSAGE";
+
+  return (
+    <div className="flex flex-col gap-2 border-l-2 pl-2 my-2 py-2 border-danger text-sm w-full">
+      <div className="font-bold text-danger">
+        {t(errorKey)}
+        <button
+          type="button"
+          onClick={() => setShowDetails((prev) => !prev)}
+          className="cursor-pointer text-left"
+        >
+          {showDetails ? (
+            <ArrowUp className="h-4 w-4 ml-2 inline fill-danger" />
+          ) : (
+            <ArrowDown className="h-4 w-4 ml-2 inline fill-danger" />
+          )}
+        </button>
+      </div>
+
+      {showDetails && (
+        <Markdown
+          components={{
+            code,
+            ul,
+            ol,
+          }}
+          remarkPlugins={[remarkGfm]}
+        >
+          {defaultMessage}
+        </Markdown>
+      )}
+    </div>
+  );
+}
--- a/frontend/src/components/features/chat/event-content-helpers/get-action-content.ts
+++ b/frontend/src/components/features/chat/event-content-helpers/get-action-content.ts
@@ -0,0 +1,125 @@
+import { ActionSecurityRisk } from "#/state/security-analyzer-slice";
+import {
+  FileWriteAction,
+  CommandAction,
+  IPythonAction,
+  BrowseAction,
+  BrowseInteractiveAction,
+  MCPAction,
+  ThinkAction,
+  OpenHandsAction,
+  FinishAction,
+} from "#/types/core/actions";
+import { getDefaultEventContent, MAX_CONTENT_LENGTH } from "./shared";
+
+const getRiskText = (risk: ActionSecurityRisk) => {
+  switch (risk) {
+    case ActionSecurityRisk.LOW:
+      return "Low Risk";
+    case ActionSecurityRisk.MEDIUM:
+      return "Medium Risk";
+    case ActionSecurityRisk.HIGH:
+      return "High Risk";
+    case ActionSecurityRisk.UNKNOWN:
+    default:
+      return "Unknown Risk";
+  }
+};
+
+const getWriteActionContent = (event: FileWriteAction): string => {
+  let { content } = event.args;
+  if (content.length > MAX_CONTENT_LENGTH) {
+    content = `${event.args.content.slice(0, MAX_CONTENT_LENGTH)}...`;
+  }
+  return `${event.args.path}\n${content}`;
+};
+
+const getRunActionContent = (event: CommandAction): string => {
+  let content = `Command:\n\`${event.args.command}\``;
+
+  if (event.args.confirmation_state === "awaiting_confirmation") {
+    content += `\n\n${getRiskText(event.args.security_risk)}`;
+  }
+
+  return content;
+};
+
+const getIPythonActionContent = (event: IPythonAction): string => {
+  let content = `\`\`\`\n${event.args.code}\n\`\`\``;
+
+  if (event.args.confirmation_state === "awaiting_confirmation") {
+    content += `\n\n${getRiskText(event.args.security_risk)}`;
+  }
+
+  return content;
+};
+
+const getBrowseActionContent = (event: BrowseAction): string =>
+  `Browsing ${event.args.url}`;
+
+const getBrowseInteractiveActionContent = (event: BrowseInteractiveAction) =>
+  `**Action:**\n\n\`\`\`python\n${event.args.browser_actions}\n\`\`\``;
+
+const getMcpActionContent = (event: MCPAction): string => {
+  // Format MCP action with name and arguments
+  const name = event.args.name || "";
+  const args = event.args.arguments || {};
+  let details = `**MCP Tool Call:** ${name}\n\n`;
+  // Include thought if available
+  if (event.args.thought) {
+    details += `\n\n**Thought:**\n${event.args.thought}`;
+  }
+  details += `\n\n**Arguments:**\n\`\`\`json\n${JSON.stringify(args, null, 2)}\n\`\`\``;
+  return details;
+};
+
+const getThinkActionContent = (event: ThinkAction): string =>
+  event.args.thought;
+
+const getFinishActionContent = (event: FinishAction): string => {
+  let content = event.args.final_thought;
+
+  switch (event.args.task_completed) {
+    case "success":
+      content +=
+        "\n\n\nI believe that the task was **completed successfully**.";
+      break;
+    case "failure":
+      content += "\n\n\nI believe that the task was **not completed**.";
+      break;
+    case "partial":
+    default:
+      content += "\n\n\nI believe that the task was **completed partially**.";
+      break;
+  }
+
+  return content.trim();
+};
+
+const getNoContentActionContent = (): string => "";
+
+export const getActionContent = (event: OpenHandsAction): string => {
+  switch (event.action) {
+    case "read":
+    case "edit":
+      return getNoContentActionContent();
+    case "write":
+      return getWriteActionContent(event);
+    case "run":
+      return getRunActionContent(event);
+    case "run_ipython":
+      return getIPythonActionContent(event);
+    case "browse":
+      return getBrowseActionContent(event);
+    case "browse_interactive":
+      return getBrowseInteractiveActionContent(event);
+    case "call_tool_mcp":
+      return getMcpActionContent(event);
+    case "think":
+      return getThinkActionContent(event);
+    case "finish":
+      return getFinishActionContent(event);
+    default:
+      return getDefaultEventContent(event);
+  }
+};
--- a/frontend/src/components/features/chat/event-content-helpers/get-event-content.tsx
+++ b/frontend/src/components/features/chat/event-content-helpers/get-event-content.tsx
@@ -0,0 +1,70 @@
+import { Trans } from "react-i18next";
+import { OpenHandsAction } from "#/types/core/actions";
+import { isOpenHandsAction, isOpenHandsObservation } from "#/types/core/guards";
+import { OpenHandsObservation } from "#/types/core/observations";
+import { MonoComponent } from "../mono-component";
+import { PathComponent } from "../path-component";
+import { getActionContent } from "./get-action-content";
+import { getObservationContent } from "./get-observation-content";
+
+const hasPathProperty = (
+  obj: Record<string, unknown>,
+): obj is { path: string } => typeof obj.path === "string";
+
+const hasCommandProperty = (
+  obj: Record<string, unknown>,
+): obj is { command: string } => typeof obj.command === "string";
+
+const trimText = (text: string, maxLength: number): string => {
+  if (!text) return "";
+  return text.length > maxLength ? `${text.substring(0, maxLength)}...` : text;
+};
+
+export const getEventContent = (
+  event: OpenHandsAction | OpenHandsObservation,
+) => {
+  let title: React.ReactNode = "";
+  let details: string = "";
+
+  if (isOpenHandsAction(event)) {
+    title = (
+      <Trans
+        i18nKey={`ACTION_MESSAGE$${event.action.toUpperCase()}`}
+        values={{
+          path: hasPathProperty(event.args) && event.args.path,
+          command:
+            hasCommandProperty(event.args) && trimText(event.args.command, 80),
+        }}
+        components={{
+          path: <PathComponent />,
+          cmd: <MonoComponent />,
+        }}
+      />
+    );
+    details = getActionContent(event);
+  }
+
+  if (isOpenHandsObservation(event)) {
+    title = (
+      <Trans
+        i18nKey={`OBSERVATION_MESSAGE$${event.observation.toUpperCase()}`}
+        values={{
+          path: hasPathProperty(event.extras) && event.extras.path,
+          command:
+            hasCommandProperty(event.extras) &&
+            trimText(event.extras.command, 80),
+        }}
+        components={{
+          path: <PathComponent />,
+          cmd: <MonoComponent />,
+        }}
+      />
+    );
+    details = getObservationContent(event);
+  }
+
+  return {
+    title: title ?? "Unknown event",
+    details: details ?? "Unknown event",
+  };
+};
--- a/frontend/src/components/features/chat/event-content-helpers/get-observation-content.ts
+++ b/frontend/src/components/features/chat/event-content-helpers/get-observation-content.ts
@@ -0,0 +1,133 @@
+import {
+  ReadObservation,
+  CommandObservation,
+  IPythonObservation,
+  EditObservation,
+  BrowseObservation,
+  OpenHandsObservation,
+  RecallObservation,
+} from "#/types/core/observations";
+import { getObservationResult } from "./get-observation-result";
+import { getDefaultEventContent, MAX_CONTENT_LENGTH } from "./shared";
+
+const getReadObservationContent = (event: ReadObservation): string =>
+  `\`\`\`\n${event.content}\n\`\`\``;
+
+const getCommandObservationContent = (
+  event: CommandObservation | IPythonObservation,
+): string => {
+  let { content } = event;
+  if (content.length > MAX_CONTENT_LENGTH) {
+    content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
+  }
+  return `Output:\n\`\`\`sh\n${content.trim() || "[Command finished execution with no output]"}\n\`\`\``;
+};
+
+const getEditObservationContent = (
+  event: EditObservation,
+  successMessage: boolean,
+): string => {
+  if (successMessage) {
+    return `\`\`\`diff\n${event.extras.diff}\n\`\`\``; // Content is already truncated by the ACI
+  }
+  return event.content;
+};
+
+const getBrowseObservationContent = (event: BrowseObservation) => {
+  let contentDetails = `**URL:** ${event.extras.url}\n`;
+  if (event.extras.error) {
+    contentDetails += `\n\n**Error:**\n${event.extras.error}\n`;
+  }
+  contentDetails += `\n\n**Output:**\n${event.content}`;
+  if (contentDetails.length > MAX_CONTENT_LENGTH) {
+    contentDetails = `${contentDetails.slice(0, MAX_CONTENT_LENGTH)}...(truncated)`;
+  }
+  return contentDetails;
+};
+
+const getMcpObservationContent = (event: OpenHandsObservation): string => {
+  let { content } = event;
+  if (content.length > MAX_CONTENT_LENGTH) {
+    content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
+  }
+  return `**Output:**\n\`\`\`\n${content.trim() || "[MCP Tool finished execution with no output]"}\n\`\`\``;
+};
+
+const getRecallObservationContent = (event: RecallObservation): string => {
+  let content = "";
+
+  if (event.extras.recall_type === "workspace_context") {
+    if (event.extras.repo_name) {
+      content += `\n\n**Repository:** ${event.extras.repo_name}`;
+    }
+    if (event.extras.repo_directory) {
+      content += `\n\n**Directory:** ${event.extras.repo_directory}`;
+    }
+    if (event.extras.date) {
+      content += `\n\n**Date:** ${event.extras.date}`;
+    }
+    if (
+      event.extras.runtime_hosts &&
+      Object.keys(event.extras.runtime_hosts).length > 0
+    ) {
+      content += `\n\n**Available Hosts**`;
+      for (const [host, port] of Object.entries(event.extras.runtime_hosts)) {
+        content += `\n\n- ${host} (port ${port})`;
+      }
+    }
+    if (event.extras.repo_instructions) {
+      content += `\n\n**Repository Instructions:**\n\n${event.extras.repo_instructions}`;
+    }
+    if (event.extras.additional_agent_instructions) {
+      content += `\n\n**Additional Instructions:**\n\n${event.extras.additional_agent_instructions}`;
+    }
+  }
+
+  // Handle microagent knowledge
+  if (
+    event.extras.microagent_knowledge &&
+    event.extras.microagent_knowledge.length > 0
+  ) {
+    content += `\n\n**Triggered Microagent Knowledge:**`;
+    for (const knowledge of event.extras.microagent_knowledge) {
+      content += `\n\n- **${knowledge.name}** (triggered by keyword: ${knowledge.trigger})\n\n\`\`\`\n${knowledge.content}\n\`\`\``;
+    }
+  }
+
+  if (
+    event.extras.custom_secrets_descriptions &&
+    Object.keys(event.extras.custom_secrets_descriptions).length > 0
+  ) {
+    content += `\n\n**Custom Secrets**`;
+    for (const [name, description] of Object.entries(
+      event.extras.custom_secrets_descriptions,
+    )) {
+      content += `\n\n- $${name}: ${description}`;
+    }
+  }
+
+  return content;
+};
+
+export const getObservationContent = (event: OpenHandsObservation): string => {
+  switch (event.observation) {
+    case "read":
+      return getReadObservationContent(event);
+    case "edit":
+      return getEditObservationContent(
+        event,
+        getObservationResult(event) === "success",
+      );
+    case "run_ipython":
+    case "run":
+      return getCommandObservationContent(event);
+    case "browse":
+      return getBrowseObservationContent(event);
+    case "mcp":
+      return getMcpObservationContent(event);
+    case "recall":
+      return getRecallObservationContent(event);
+    default:
+      return getDefaultEventContent(event);
+  }
+};
--- a/frontend/src/components/features/chat/event-content-helpers/get-observation-result.ts
+++ b/frontend/src/components/features/chat/event-content-helpers/get-observation-result.ts
@@ -0,0 +1,26 @@
+import { OpenHandsObservation } from "#/types/core/observations";
+
+export type ObservationResultStatus = "success" | "error" | "timeout";
+
+export const getObservationResult = (event: OpenHandsObservation) => {
+  const hasContent = event.content.length > 0;
+  const contentIncludesError = event.content.toLowerCase().includes("error:");
+
+  switch (event.observation) {
+    case "run": {
+      const exitCode = event.extras.metadata.exit_code;
+
+      if (exitCode === -1) return "timeout"; // Command timed out
+      if (exitCode === 0) return "success"; // Command executed successfully
+      return "error"; // Command failed
+    }
+    case "run_ipython":
+    case "read":
+    case "edit":
+    case "mcp":
+      if (!hasContent || contentIncludesError) return "error";
+      return "success"; // Content is valid
+    default:
+      return "success";
+  }
+};
--- a/frontend/src/components/features/chat/event-content-helpers/shared.ts
+++ b/frontend/src/components/features/chat/event-content-helpers/shared.ts
@@ -0,0 +1,8 @@
+import { OpenHandsAction } from "#/types/core/actions";
+import { OpenHandsObservation } from "#/types/core/observations";
+
+export const MAX_CONTENT_LENGTH = 1000;
+
+export const getDefaultEventContent = (
+  event: OpenHandsAction | OpenHandsObservation,
+): string => `\`\`\`json\n${JSON.stringify(event, null, 2)}\n\`\`\``;
--- a/frontend/src/components/features/chat/event-content-helpers/should-render-event.ts
+++ b/frontend/src/components/features/chat/event-content-helpers/should-render-event.ts
@@ -0,0 +1,27 @@
+import { OpenHandsAction } from "#/types/core/actions";
+import { OpenHandsEventType } from "#/types/core/base";
+import { isOpenHandsAction, isOpenHandsObservation } from "#/types/core/guards";
+import { OpenHandsObservation } from "#/types/core/observations";
+
+const COMMON_NO_RENDER_LIST: OpenHandsEventType[] = [
+  "system",
+  "agent_state_changed",
+  "change_agent_state",
+];
+
+const ACTION_NO_RENDER_LIST: OpenHandsEventType[] = ["recall"];
+
+export const shouldRenderEvent = (
+  event: OpenHandsAction | OpenHandsObservation,
+) => {
+  if (isOpenHandsAction(event)) {
+    const noRenderList = COMMON_NO_RENDER_LIST.concat(ACTION_NO_RENDER_LIST);
+    return !noRenderList.includes(event.action);
+  }
+
+  if (isOpenHandsObservation(event)) {
+    return !COMMON_NO_RENDER_LIST.includes(event.observation);
+  }
+
+  return true;
+};
--- a/frontend/src/components/features/chat/event-message.tsx
+++ b/frontend/src/components/features/chat/event-message.tsx
@@ -0,0 +1,123 @@
+import { ConfirmationButtons } from "#/components/shared/buttons/confirmation-buttons";
+import { I18nKey } from "#/i18n/declaration";
+import { OpenHandsAction } from "#/types/core/actions";
+import {
+  isUserMessage,
+  isErrorObservation,
+  isAssistantMessage,
+  isOpenHandsAction,
+  isOpenHandsObservation,
+  isFinishAction,
+  isRejectObservation,
+} from "#/types/core/guards";
+import { OpenHandsObservation } from "#/types/core/observations";
+import { ImageCarousel } from "../images/image-carousel";
+import { ChatMessage } from "./chat-message";
+import { ErrorMessage } from "./error-message";
+import { getObservationResult } from "./event-content-helpers/get-observation-result";
+import { getEventContent } from "./event-content-helpers/get-event-content";
+import { ExpandableMessage } from "./expandable-message";
+import { GenericEventMessage } from "./generic-event-message";
+
+const hasThoughtProperty = (
+  obj: Record<string, unknown>,
+): obj is { thought: string } => "thought" in obj && !!obj.thought;
+
+interface EventMessageProps {
+  event: OpenHandsAction | OpenHandsObservation;
+  hasObservationPair: boolean;
+  isFirstMessageWithResolverTrigger: boolean;
+  isAwaitingUserConfirmation: boolean;
+  isLastMessage: boolean;
+}
+
+export function EventMessage({
+  event,
+  hasObservationPair,
+  isFirstMessageWithResolverTrigger,
+  isAwaitingUserConfirmation,
+  isLastMessage,
+}: EventMessageProps) {
+  const shouldShowConfirmationButtons =
+    isLastMessage && event.source === "agent" && isAwaitingUserConfirmation;
+
+  const isFirstUserMessageWithResolverTrigger =
+    isFirstMessageWithResolverTrigger && isUserMessage(event);
+
+  // Special case: First user message with resolver trigger
+  if (isFirstUserMessageWithResolverTrigger) {
+    return (
+      <div>
+        <ExpandableMessage
+          type="action"
+          message={event.args.content}
+          id={I18nKey.CHAT$RESOLVER_INSTRUCTIONS}
+        />
+        {event.args.image_urls && event.args.image_urls.length > 0 && (
+          <ImageCarousel size="small" images={event.args.image_urls} />
+        )}
+      </div>
+    );
+  }
+
+  if (isErrorObservation(event)) {
+    return (
+      <ErrorMessage
+        errorId={event.extras.error_id}
+        defaultMessage={event.message}
+      />
+    );
+  }
+
+  if (
+    hasObservationPair &&
+    isOpenHandsAction(event) &&
+    hasThoughtProperty(event.args)
+  ) {
+    return <ChatMessage type="agent" message={event.args.thought} />;
+  }
+
+  if (isFinishAction(event)) {
+    return (
+      <ChatMessage type="agent" message={getEventContent(event).details} />
+    );
+  }
+
+  if (isUserMessage(event) || isAssistantMessage(event)) {
+    return (
+      <ChatMessage
+        type={event.source}
+        message={isUserMessage(event) ? event.args.content : event.message}
+      >
+        {event.args.image_urls && event.args.image_urls.length > 0 && (
+          <ImageCarousel size="small" images={event.args.image_urls} />
+        )}
+        {shouldShowConfirmationButtons && <ConfirmationButtons />}
+      </ChatMessage>
+    );
+  }
+
+  if (isRejectObservation(event)) {
+    return <ChatMessage type="agent" message={event.content} />;
+  }
+
+  return (
+    <div>
+      {isOpenHandsAction(event) && hasThoughtProperty(event.args) && (
+        <ChatMessage type="agent" message={event.args.thought} />
+      )}
+
+      <GenericEventMessage
+        title={getEventContent(event).title}
+        details={getEventContent(event).details}
+        success={
+          isOpenHandsObservation(event)
+            ? getObservationResult(event)
+            : undefined
+        }
+      />
+
+      {shouldShowConfirmationButtons && <ConfirmationButtons />}
+    </div>
+  );
+}
--- a/frontend/src/components/features/chat/generic-event-message.tsx
+++ b/frontend/src/components/features/chat/generic-event-message.tsx
@@ -0,0 +1,61 @@
+import React from "react";
+import Markdown from "react-markdown";
+import remarkGfm from "remark-gfm";
+import { code } from "../markdown/code";
+import { ol, ul } from "../markdown/list";
+import ArrowDown from "#/icons/angle-down-solid.svg?react";
+import ArrowUp from "#/icons/angle-up-solid.svg?react";
+import { SuccessIndicator } from "./success-indicator";
+import { ObservationResultStatus } from "./event-content-helpers/get-observation-result";
+
+interface GenericEventMessageProps {
+  title: React.ReactNode;
+  details: string;
+  success?: ObservationResultStatus;
+}
+
+export function GenericEventMessage({
+  title,
+  details,
+  success,
+}: GenericEventMessageProps) {
+  const [showDetails, setShowDetails] = React.useState(false);
+
+  return (
+    <div className="flex flex-col gap-2 border-l-2 pl-2 my-2 py-2 border-neutral-300 text-sm w-full">
+      <div className="flex items-center justify-between font-bold text-neutral-300">
+        <div>
+          {title}
+          {details && (
+            <button
+              type="button"
+              onClick={() => setShowDetails((prev) => !prev)}
+              className="cursor-pointer text-left"
+            >
+              {showDetails ? (
+                <ArrowUp className="h-4 w-4 ml-2 inline fill-neutral-300" />
+              ) : (
+                <ArrowDown className="h-4 w-4 ml-2 inline fill-neutral-300" />
+              )}
+            </button>
+          )}
+        </div>
+
+        {success && <SuccessIndicator status={success} />}
+      </div>
+
+      {showDetails && (
+        <Markdown
+          components={{
+            code,
+            ul,
+            ol,
+          }}
+          remarkPlugins={[remarkGfm]}
+        >
+          {details}
+        </Markdown>
+      )}
+    </div>
+  );
+}
--- a/frontend/src/components/features/chat/messages.tsx
+++ b/frontend/src/components/features/chat/messages.tsx
@@ -1,80 +1,82 @@
 import React from "react";
-import type { Message } from "#/message";
-import { ChatMessage } from "#/components/features/chat/chat-message";
-import { ConfirmationButtons } from "#/components/shared/buttons/confirmation-buttons";
-import { ImageCarousel } from "../images/image-carousel";
-import { ExpandableMessage } from "./expandable-message";
 import { useUserConversation } from "#/hooks/query/use-user-conversation";
 import { useConversation } from "#/context/conversation-context";
-import { I18nKey } from "#/i18n/declaration";
+import { OpenHandsAction } from "#/types/core/actions";
+import { OpenHandsObservation } from "#/types/core/observations";
+import { isOpenHandsAction, isOpenHandsObservation } from "#/types/core/guards";
+import { OpenHandsEventType } from "#/types/core/base";
+import { EventMessage } from "./event-message";
+import { ChatMessage } from "./chat-message";
+import { useOptimisticUserMessage } from "#/hooks/use-optimistic-user-message";
+
+const COMMON_NO_RENDER_LIST: OpenHandsEventType[] = [
+  "system",
+  "agent_state_changed",
+  "change_agent_state",
+];
+
+const ACTION_NO_RENDER_LIST: OpenHandsEventType[] = ["recall"];
+
+const shouldRenderEvent = (event: OpenHandsAction | OpenHandsObservation) => {
+  if (isOpenHandsAction(event)) {
+    const noRenderList = COMMON_NO_RENDER_LIST.concat(ACTION_NO_RENDER_LIST);
+    return !noRenderList.includes(event.action);
+  }
+
+  if (isOpenHandsObservation(event)) {
+    return !COMMON_NO_RENDER_LIST.includes(event.observation);
+  }
+
+  return true;
+};

 interface MessagesProps {
-  messages: Message[];
+  messages: (OpenHandsAction | OpenHandsObservation)[];
  isAwaitingUserConfirmation: boolean;
 }

 export const Messages: React.FC<MessagesProps> = React.memo(
  ({ messages, isAwaitingUserConfirmation }) => {
+    const { getOptimisticUserMessage } = useOptimisticUserMessage();
    const { conversationId } = useConversation();
    const { data: conversation } = useUserConversation(conversationId || null);

+    const optimisticUserMessage = getOptimisticUserMessage();
+
    // Check if conversation metadata has trigger=resolver
    const isResolverTrigger = conversation?.trigger === "resolver";

-    return messages.map((message, index) => {
-      const shouldShowConfirmationButtons =
-        messages.length - 1 === index &&
-        message.sender === "assistant" &&
-        isAwaitingUserConfirmation;
+    const actionHasObservationPair = React.useCallback(
+      (event: OpenHandsAction | OpenHandsObservation): boolean => {
+        if (isOpenHandsAction(event)) {
+          return !!messages.some(
+            (msg) => isOpenHandsObservation(msg) && msg.cause === event.id,
+          );
+        }

-      const isFirstUserMessageWithResolverTrigger =
-        index === 0 && message.sender === "user" && isResolverTrigger;
+        return false;
+      },
+      [messages],
+    );

-      // Special case: First user message with resolver trigger
-      if (isFirstUserMessageWithResolverTrigger) {
-        return (
-          <div key={index}>
-            <ExpandableMessage
-              type="action"
-              message={message.content}
-              id={I18nKey.CHAT$RESOLVER_INSTRUCTIONS}
-            />
-            {message.imageUrls && message.imageUrls.length > 0 && (
-              <ImageCarousel size="small" images={message.imageUrls} />
-            )}
-          </div>
-        );
-      }
+    return (
+      <>
+        {messages.filter(shouldRenderEvent).map((message, index) => (
+          <EventMessage
+            key={index}
+            event={message}
+            hasObservationPair={actionHasObservationPair(message)}
+            isFirstMessageWithResolverTrigger={index === 0 && isResolverTrigger}
+            isAwaitingUserConfirmation={isAwaitingUserConfirmation}
+            isLastMessage={messages.length - 1 === index}
+          />
+        ))}

-      if (message.type === "error" || message.type === "action") {
-        return (
-          <div key={index}>
-            <ExpandableMessage
-              type={message.type}
-              id={message.translationID}
-              message={message.content}
-              success={message.success}
-              observation={message.observation}
-              action={message.action}
-            />
-            {shouldShowConfirmationButtons && <ConfirmationButtons />}
-          </div>
-        );
-      }
-
-      return (
-        <ChatMessage
-          key={index}
-          type={message.sender}
-          message={message.content}
-        >
-          {message.imageUrls && message.imageUrls.length > 0 && (
-            <ImageCarousel size="small" images={message.imageUrls} />
-          )}
-          {shouldShowConfirmationButtons && <ConfirmationButtons />}
-        </ChatMessage>
-      );
-    });
+        {optimisticUserMessage && (
+          <ChatMessage type="user" message={optimisticUserMessage} />
+        )}
+      </>
+    );
  },
 );

--- a/frontend/src/components/features/chat/success-indicator.tsx
+++ b/frontend/src/components/features/chat/success-indicator.tsx
@@ -0,0 +1,35 @@
+import { FaClock } from "react-icons/fa";
+import CheckCircle from "#/icons/check-circle-solid.svg?react";
+import XCircle from "#/icons/x-circle-solid.svg?react";
+import { ObservationResultStatus } from "./event-content-helpers/get-observation-result";
+
+interface SuccessIndicatorProps {
+  status: ObservationResultStatus;
+}
+
+export function SuccessIndicator({ status }: SuccessIndicatorProps) {
+  return (
+    <span className="flex-shrink-0">
+      {status === "success" && (
+        <CheckCircle
+          data-testid="status-icon"
+          className="h-4 w-4 ml-2 inline fill-success"
+        />
+      )}
+
+      {status === "error" && (
+        <XCircle
+          data-testid="status-icon"
+          className="h-4 w-4 ml-2 inline fill-danger"
+        />
+      )}
+
+      {status === "timeout" && (
+        <FaClock
+          data-testid="status-icon"
+          className="h-4 w-4 ml-2 inline fill-yellow-500"
+        />
+      )}
+    </span>
+  );
+}
--- a/frontend/src/components/features/context-menu/account-settings-context-menu.tsx
+++ b/frontend/src/components/features/context-menu/account-settings-context-menu.tsx
@@ -7,13 +7,11 @@ import { I18nKey } from "#/i18n/declaration";
 interface AccountSettingsContextMenuProps {
  onLogout: () => void;
  onClose: () => void;
-  isLoggedIn: boolean;
 }

 export function AccountSettingsContextMenu({
  onLogout,
  onClose,
-  isLoggedIn,
 }: AccountSettingsContextMenuProps) {
  const ref = useClickOutsideElement<HTMLUListElement>(onClose);
  const { t } = useTranslation();
@@ -24,7 +22,7 @@ export function AccountSettingsContextMenu({
      ref={ref}
      className="absolute right-full md:left-full -top-1 z-10 w-fit"
    >
-      <ContextMenuListItem onClick={onLogout} isDisabled={!isLoggedIn}>
+      <ContextMenuListItem onClick={onLogout}>
        {t(I18nKey.ACCOUNT_SETTINGS$LOGOUT)}
      </ContextMenuListItem>
    </ContextMenu>
--- a/frontend/src/components/features/conversation-panel/conversation-card.tsx
+++ b/frontend/src/components/features/conversation-panel/conversation-card.tsx
@@ -15,8 +15,9 @@ import { cn } from "#/utils/utils";
 import { BaseModal } from "../../shared/modals/base-modal/base-modal";
 import { RootState } from "#/store";
 import { I18nKey } from "#/i18n/declaration";
-import { selectSystemMessage } from "#/state/chat-slice";
 import { transformVSCodeUrl } from "#/utils/vscode-url-helper";
+import { useWsClient } from "#/context/ws-client-provider";
+import { isSystemMessage } from "#/types/core/guards";

 interface ConversationCardProps {
  onClick?: () => void;
@@ -52,15 +53,17 @@ export function ConversationCard({
  conversationId,
 }: ConversationCardProps) {
  const { t } = useTranslation();
+  const { parsedEvents } = useWsClient();
  const [contextMenuVisible, setContextMenuVisible] = React.useState(false);
  const [titleMode, setTitleMode] = React.useState<"view" | "edit">("view");
  const [metricsModalVisible, setMetricsModalVisible] = React.useState(false);
  const [systemModalVisible, setSystemModalVisible] = React.useState(false);
  const inputRef = React.useRef<HTMLInputElement>(null);

+  const systemMessage = parsedEvents.find(isSystemMessage);
+
  // Subscribe to metrics data from Redux store
  const metrics = useSelector((state: RootState) => state.metrics);
-  const systemMessage = useSelector(selectSystemMessage);

  const handleBlur = () => {
    if (inputRef.current?.value) {
@@ -365,7 +368,7 @@ export function ConversationCard({
      <SystemMessageModal
        isOpen={systemModalVisible}
        onClose={() => setSystemModalVisible(false)}
-        systemMessage={systemMessage}
+        systemMessage={systemMessage ? systemMessage.args : null}
      />
    </>
  );
--- a/frontend/src/components/features/home/tasks/task-card.tsx
+++ b/frontend/src/components/features/home/tasks/task-card.tsx
@@ -6,6 +6,7 @@ import { cn } from "#/utils/utils";
 import { useUserRepositories } from "#/hooks/query/use-user-repositories";
 import { TaskIssueNumber } from "./task-issue-number";
 import { Provider } from "#/types/settings";
+import { useOptimisticUserMessage } from "#/hooks/use-optimistic-user-message";

 const getTaskTypeMap = (
  t: (key: string) => string,
@@ -21,6 +22,7 @@ interface TaskCardProps {
 }

 export function TaskCard({ task }: TaskCardProps) {
+  const { setOptimisticUserMessage } = useOptimisticUserMessage();
  const { data: repositories } = useUserRepositories();
  const { mutate: createConversation, isPending } = useCreateConversation();
  const isCreatingConversation = useIsCreatingConversation();
@@ -38,6 +40,7 @@ export function TaskCard({ task }: TaskCardProps) {

  const handleLaunchConversation = () => {
    const repo = getRepo(task.repo, task.git_provider);
+    setOptimisticUserMessage("Addressing task...");

    return createConversation({
      selectedRepository: repo,
--- a/frontend/src/components/features/jupyter/jupyter-cell-output.tsx
+++ b/frontend/src/components/features/jupyter/jupyter-cell-output.tsx
@@ -24,6 +24,10 @@ export function JupyterCellOutput({ lines }: JupyterCellOutputProps) {
        {/* display the lines as plaintext or image */}
        {lines.map((line, index) => {
          if (line.type === "image") {
+            // Use markdown to display the image
+            const imageMarkdown = line.url
+              ? `![image](${line.url})`
+              : line.content;
            return (
              <div key={index}>
                <Markdown
@@ -32,7 +36,7 @@ export function JupyterCellOutput({ lines }: JupyterCellOutputProps) {
                  }}
                  urlTransform={(value: string) => value}
                >
-                  {line.content}
+                  {imageMarkdown}
                </Markdown>
              </div>
            );
--- a/frontend/src/components/features/jupyter/jupyter-cell.tsx
+++ b/frontend/src/components/features/jupyter/jupyter-cell.tsx
@@ -12,8 +12,8 @@ export function JupyterCell({ cell }: JupyterCellProps) {
  const [lines, setLines] = React.useState<JupyterLine[]>([]);

  React.useEffect(() => {
-    setLines(parseCellContent(cell.content));
-  }, [cell.content]);
+    setLines(parseCellContent(cell.content, cell.imageUrls));
+  }, [cell.content, cell.imageUrls]);

  if (cell.type === "input") {
    return <JupytrerCellInput code={cell.content} />;
--- a/frontend/src/components/features/sidebar/user-actions.tsx
+++ b/frontend/src/components/features/sidebar/user-actions.tsx
@@ -35,7 +35,6 @@ export function UserActions({ onLogout, user, isLoading }: UserActionsProps) {

      {accountContextMenuIsVisible && (
        <AccountSettingsContextMenu
-          isLoggedIn={!!user}
          onLogout={handleLogout}
          onClose={closeAccountMenu}
        />
--- a/frontend/src/context/ws-client-provider.tsx
+++ b/frontend/src/context/ws-client-provider.tsx
@@ -3,7 +3,7 @@ import { io, Socket } from "socket.io-client";
 import { useQueryClient } from "@tanstack/react-query";
 import EventLogger from "#/utils/event-logger";
 import { handleAssistantMessage } from "#/services/actions";
-import { showChatError } from "#/utils/error-handler";
+import { showChatError, trackError } from "#/utils/error-handler";
 import { useRate } from "#/hooks/use-rate";
 import { OpenHandsParsedEvent } from "#/types/core";
 import {
@@ -11,10 +11,26 @@ import {
  CommandAction,
  FileEditAction,
  FileWriteAction,
+  OpenHandsAction,
  UserMessageAction,
 } from "#/types/core/actions";
 import { Conversation } from "#/api/open-hands.types";
 import { useUserProviders } from "#/hooks/use-user-providers";
+import { OpenHandsObservation } from "#/types/core/observations";
+import {
+  isErrorObservation,
+  isOpenHandsAction,
+  isOpenHandsObservation,
+  isUserMessage,
+} from "#/types/core/guards";
+import { useOptimisticUserMessage } from "#/hooks/use-optimistic-user-message";
+import { useWSErrorMessage } from "#/hooks/use-ws-error-message";
+
+const hasValidMessageProperty = (obj: unknown): obj is { message: string } =>
+  typeof obj === "object" &&
+  obj !== null &&
+  "message" in obj &&
+  typeof obj.message === "string";

 const isOpenHandsEvent = (event: unknown): event is OpenHandsParsedEvent =>
  typeof event === "object" &&
@@ -35,14 +51,6 @@ const isFileEditAction = (
 const isCommandAction = (event: OpenHandsParsedEvent): event is CommandAction =>
  "action" in event && event.action === "run";

-const isUserMessage = (
-  event: OpenHandsParsedEvent,
-): event is UserMessageAction =>
-  "source" in event &&
-  "type" in event &&
-  event.source === "user" &&
-  event.type === "message";
-
 const isAssistantMessage = (
  event: OpenHandsParsedEvent,
 ): event is AssistantMessageAction =>
@@ -65,6 +73,7 @@ interface UseWsClient {
  status: WsClientProviderStatus;
  isLoadingMessages: boolean;
  events: Record<string, unknown>[];
+  parsedEvents: (OpenHandsAction | OpenHandsObservation)[];
  send: (event: Record<string, unknown>) => void;
 }

@@ -72,6 +81,7 @@ const WsClientContext = React.createContext<UseWsClient>({
  status: WsClientProviderStatus.DISCONNECTED,
  isLoadingMessages: true,
  events: [],
+  parsedEvents: [],
  send: () => {
    throw new Error("not connected");
  },
@@ -121,12 +131,17 @@ export function WsClientProvider({
  conversationId,
  children,
 }: React.PropsWithChildren<WsClientProviderProps>) {
+  const { removeOptimisticUserMessage } = useOptimisticUserMessage();
+  const { setErrorMessage, removeErrorMessage } = useWSErrorMessage();
  const queryClient = useQueryClient();
  const sioRef = React.useRef<Socket | null>(null);
  const [status, setStatus] = React.useState(
    WsClientProviderStatus.DISCONNECTED,
  );
  const [events, setEvents] = React.useState<Record<string, unknown>[]>([]);
+  const [parsedEvents, setParsedEvents] = React.useState<
+    (OpenHandsAction | OpenHandsObservation)[]
+  >([]);
  const lastEventRef = React.useRef<Record<string, unknown> | null>(null);
  const { providers } = useUserProviders();

@@ -146,6 +161,24 @@ export function WsClientProvider({

  function handleMessage(event: Record<string, unknown>) {
    if (isOpenHandsEvent(event)) {
+      if (isOpenHandsAction(event) || isOpenHandsObservation(event)) {
+        setParsedEvents((prevEvents) => [...prevEvents, event]);
+      }
+
+      if (isErrorObservation(event)) {
+        trackError({
+          message: event.message,
+          source: "chat",
+          metadata: { msgId: event.id },
+        });
+      } else {
+        removeErrorMessage();
+      }
+
+      if (isUserMessage(event)) {
+        removeOptimisticUserMessage();
+      }
+
      if (isMessageAction(event)) {
        messageRateHandler.record(new Date().getTime());
      }
@@ -156,7 +189,7 @@ export function WsClientProvider({
        isFileWriteAction(event) ||
        isCommandAction(event)
      ) {
-        queryClient.invalidateQueries({
+        queryClient.removeQueries({
          queryKey: ["file_changes", conversationId],
        });

@@ -202,11 +235,23 @@ export function WsClientProvider({
    sio.io.opts.query = sio.io.opts.query || {};
    sio.io.opts.query.latest_event_id = lastEventRef.current?.id;
    updateStatusWhenErrorMessagePresent(data);
+
+    setErrorMessage(
+      hasValidMessageProperty(data)
+        ? data.message
+        : "The WebSocket connection was closed.",
+    );
  }

  function handleError(data: unknown) {
    setStatus(WsClientProviderStatus.DISCONNECTED);
    updateStatusWhenErrorMessagePresent(data);
+
+    setErrorMessage(
+      hasValidMessageProperty(data)
+        ? data.message
+        : "An unknown error occurred on the WebSocket connection.",
+    );
  }

  React.useEffect(() => {
@@ -267,9 +312,10 @@ export function WsClientProvider({
      status,
      isLoadingMessages: messageRateHandler.isUnderThreshold,
      events,
+      parsedEvents,
      send,
    }),
-    [status, messageRateHandler.isUnderThreshold, events],
+    [status, messageRateHandler.isUnderThreshold, events, parsedEvents],
  );

  return <WsClientContext value={value}>{children}</WsClientContext>;
--- a/frontend/src/hooks/query/use-is-authed.ts
+++ b/frontend/src/hooks/query/use-is-authed.ts
@@ -1,4 +1,5 @@
 import { useQuery } from "@tanstack/react-query";
+import axios, { AxiosError } from "axios";
 import OpenHands from "#/api/open-hands";
 import { useConfig } from "./use-config";
 import { useIsOnTosPage } from "#/hooks/use-is-on-tos-page";
@@ -11,7 +12,23 @@ export const useIsAuthed = () => {

  return useQuery({
    queryKey: ["user", "authenticated", appMode],
-    queryFn: () => OpenHands.authenticate(appMode!),
+    queryFn: async () => {
+      try {
+        // If in OSS mode or authentication succeeds, return true
+        await OpenHands.authenticate(appMode!);
+        return true;
+      } catch (error) {
+        // If it's a 401 error, return false (not authenticated)
+        if (axios.isAxiosError(error)) {
+          const axiosError = error as AxiosError;
+          if (axiosError.response?.status === 401) {
+            return false;
+          }
+        }
+        // For any other error, throw it to put the query in error state
+        throw error;
+      }
+    },
    enabled: !!appMode && !isOnTosPage,
    staleTime: 1000 * 60 * 5, // 5 minutes
    gcTime: 1000 * 60 * 15, // 15 minutes
--- a/frontend/src/hooks/use-handle-ws-events.ts
+++ b/frontend/src/hooks/use-handle-ws-events.ts
@@ -1,10 +1,7 @@
 import React from "react";
-import { useDispatch } from "react-redux";
 import { useWsClient } from "#/context/ws-client-provider";
 import { generateAgentStateChangeEvent } from "#/services/agent-state-service";
-import { addErrorMessage } from "#/state/chat-slice";
 import { AgentState } from "#/types/agent-state";
-import { ErrorObservation } from "#/types/core/observations";
 import { displayErrorToast } from "#/utils/custom-toast-handlers";

 interface ServerError {
@@ -15,12 +12,8 @@ interface ServerError {

 const isServerError = (data: object): data is ServerError => "error" in data;

-const isErrorObservation = (data: object): data is ErrorObservation =>
-  "observation" in data && data.observation === "error";
-
 export const useHandleWSEvents = () => {
  const { events, send } = useWsClient();
-  const dispatch = useDispatch();

  React.useEffect(() => {
    if (!events.length) {
@@ -49,14 +42,5 @@ export const useHandleWSEvents = () => {
        send(generateAgentStateChangeEvent(AgentState.PAUSED));
      }
    }
-
-    if (isErrorObservation(event)) {
-      dispatch(
-        addErrorMessage({
-          id: event.extras?.error_id,
-          message: event.message,
-        }),
-      );
-    }
  }, [events.length]);
 };
--- a/frontend/src/hooks/use-optimistic-user-message.ts
+++ b/frontend/src/hooks/use-optimistic-user-message.ts
@@ -0,0 +1,23 @@
+import { useQueryClient } from "@tanstack/react-query";
+
+export const useOptimisticUserMessage = () => {
+  const queryKey = ["optimistic_user_message"] as const;
+  const queryClient = useQueryClient();
+
+  const setOptimisticUserMessage = (message: string) => {
+    queryClient.setQueryData<string>(queryKey, message);
+  };
+
+  const getOptimisticUserMessage = () =>
+    queryClient.getQueryData<string>(queryKey);
+
+  const removeOptimisticUserMessage = () => {
+    queryClient.removeQueries({ queryKey });
+  };
+
+  return {
+    setOptimisticUserMessage,
+    getOptimisticUserMessage,
+    removeOptimisticUserMessage,
+  };
+};
--- a/frontend/src/hooks/use-ws-error-message.ts
+++ b/frontend/src/hooks/use-ws-error-message.ts
@@ -0,0 +1,22 @@
+import { useQueryClient } from "@tanstack/react-query";
+
+export const useWSErrorMessage = () => {
+  const queryClient = useQueryClient();
+
+  const setErrorMessage = (message: string) => {
+    queryClient.setQueryData<string>(["error_message"], message);
+  };
+
+  const getErrorMessage = () =>
+    queryClient.getQueryData<string>(["error_message"]);
+
+  const removeErrorMessage = () => {
+    queryClient.removeQueries({ queryKey: ["error_message"] });
+  };
+
+  return {
+    setErrorMessage,
+    getErrorMessage,
+    removeErrorMessage,
+  };
+};
--- a/frontend/src/i18n/translation.json
+++ b/frontend/src/i18n/translation.json
@@ -12,7 +12,8 @@
        "ar": "قيمة السر مطلوبة",
        "fr": "La valeur du secret est requise",
        "tr": "Gizli değer gereklidir",
-        "de": "Geheimer Wert ist erforderlich"
+        "de": "Geheimer Wert ist erforderlich",
+        "uk": "Значення секрету є обов'язковим"
    },
    "SECRETS$ADD_SECRET": {
        "en": "Add secret",
@@ -27,7 +28,8 @@
        "ar": "إضافة سر",
        "fr": "Ajouter un secret",
        "tr": "Gizli ekle",
-        "de": "Geheimnis hinzufügen"
+        "de": "Geheimnis hinzufügen",
+        "uk": "Додати секрет"
    },
    "SECRETS$EDIT_SECRET": {
        "en": "Edit secret",
@@ -42,7 +44,8 @@
        "ar": "تعديل السر",
        "fr": "Modifier le secret",
        "tr": "Gizliyi düzenle",
-        "de": "Geheimnis bearbeiten"
+        "de": "Geheimnis bearbeiten",
+        "uk": "Редагувати секрет"
    },
    "SECRETS$NO_SECRETS_FOUND": {
        "en": "No secrets found",
@@ -57,7 +60,8 @@
        "ar": "لم يتم العثور على أسرار",
        "fr": "Aucun secret trouvé",
        "tr": "Gizli bulunamadı",
-        "de": "Keine Geheimnisse gefunden"
+        "de": "Keine Geheimnisse gefunden",
+        "uk": "Секретів не знайдено"
    },
    "SECRETS$ADD_NEW_SECRET": {
        "en": "Add a new secret",
@@ -72,7 +76,8 @@
        "ar": "إضافة سر جديد",
        "fr": "Ajouter un nouveau secret",
        "tr": "Yeni bir gizli ekle",
-        "de": "Neues Geheimnis hinzufügen"
+        "de": "Neues Geheimnis hinzufügen",
+        "uk": "Додати новий секрет"
    },
    "SECRETS$CONFIRM_DELETE_KEY": {
        "en": "Are you sure you want to delete this key?",
@@ -87,7 +92,8 @@
        "ar": "هل أنت متأكد أنك تريد حذف هذا المفتاح؟",
        "fr": "Êtes-vous sûr de vouloir supprimer cette clé ?",
        "tr": "Bu anahtarı silmek istediğinizden emin misiniz?",
-        "de": "Sind Sie sicher, dass Sie diesen Schlüssel löschen möchten?"
+        "de": "Sind Sie sicher, dass Sie diesen Schlüssel löschen möchten?",
+        "uk": "Ви впевнені, що хочете видалити цей ключ?"
    },
    "SETTINGS$MCP_TITLE": {
        "en": "Model Context Protocol (MCP)",
@@ -1062,7 +1068,8 @@
        "ar": "أسرار",
        "fr": "Secrets",
        "tr": "Sırları",
-        "de": "Geheimnisse"
+        "de": "Geheimnisse",
+        "uk": "Секрети"
    },
    "SETTINGS$NAV_API_KEYS": {
        "en": "API Keys",
@@ -6377,20 +6384,20 @@
        "uk": "Завантажити файл"
    },
    "ACTION_MESSAGE$RUN": {
-        "en": "Running <cmd>{{action.payload.args.command}}</cmd>",
-        "zh-CN": "运行 <cmd>{{action.payload.args.command}}</cmd>",
-        "zh-TW": "執行 <cmd>{{action.payload.args.command}}</cmd>",
-        "ko-KR": "실행 <cmd>{{action.payload.args.command}}</cmd>",
-        "ja": "実行 <cmd>{{action.payload.args.command}}</cmd>",
-        "no": "Kjører <cmd>{{action.payload.args.command}}</cmd>",
-        "ar": "تشغيل <cmd>{{action.payload.args.command}}</cmd>",
-        "de": "Führt <cmd>{{action.payload.args.command}}</cmd> aus",
-        "fr": "Exécution de <cmd>{{action.payload.args.command}}</cmd>",
-        "it": "Esecuzione di <cmd>{{action.payload.args.command}}</cmd>",
-        "pt": "Executando <cmd>{{action.payload.args.command}}</cmd>",
-        "es": "Ejecutando <cmd>{{action.payload.args.command}}</cmd>",
-        "tr": "<cmd>{{action.payload.args.command}}</cmd> çalıştırılıyor",
-        "uk": "Виконую <cmd>{{action.payload.args.command}}</cmd>"
+        "en": "Running <cmd>{{command}}</cmd>",
+        "zh-CN": "运行 <cmd>{{command}}</cmd>",
+        "zh-TW": "執行 <cmd>{{command}}</cmd>",
+        "ko-KR": "실행 <cmd>{{command}}</cmd>",
+        "ja": "実行 <cmd>{{command}}</cmd>",
+        "no": "Kjører <cmd>{{command}}</cmd>",
+        "ar": "تشغيل <cmd>{{command}}</cmd>",
+        "de": "Führt <cmd>{{command}}</cmd> aus",
+        "fr": "Exécution de <cmd>{{command}}</cmd>",
+        "it": "Esecuzione di <cmd>{{command}}</cmd>",
+        "pt": "Executando <cmd>{{command}}</cmd>",
+        "es": "Ejecutando <cmd>{{command}}</cmd>",
+        "tr": "<cmd>{{command}}</cmd> çalıştırılıyor",
+        "uk": "Виконую <cmd>{{command}}</cmd>"
    },
    "ACTION_MESSAGE$RUN_IPYTHON": {
        "en": "Running a Python command",
@@ -6425,52 +6432,52 @@
        "uk": "Викликаю інструмент MCP: {{action.payload.args.name}}"
    },
    "ACTION_MESSAGE$READ": {
-        "en": "Reading <path>{{action.payload.args.path}}</path>",
-        "zh-CN": "读取 <path>{{action.payload.args.path}}</path>",
-        "zh-TW": "讀取 <path>{{action.payload.args.path}}</path>",
-        "ko-KR": "읽기 <path>{{action.payload.args.path}}</path>",
-        "ja": "読み取り <path>{{action.payload.args.path}}</path>",
-        "no": "Leser <path>{{action.payload.args.path}}</path>",
-        "ar": "قراءة <path>{{action.payload.args.path}}</path>",
-        "de": "Liest <path>{{action.payload.args.path}}</path>",
-        "fr": "Lecture de <path>{{action.payload.args.path}}</path>",
-        "it": "Lettura di <path>{{action.payload.args.path}}</path>",
-        "pt": "Lendo <path>{{action.payload.args.path}}</path>",
-        "es": "Leyendo <path>{{action.payload.args.path}}</path>",
-        "tr": "<path>{{action.payload.args.path}}</path> okunuyor",
-        "uk": "Читаю <path>{{action.payload.args.path}}</path>"
+        "en": "Reading <path>{{path}}</path>",
+        "zh-CN": "读取 <path>{{path}}</path>",
+        "zh-TW": "讀取 <path>{{path}}</path>",
+        "ko-KR": "읽기 <path>{{path}}</path>",
+        "ja": "読み取り <path>{{path}}</path>",
+        "no": "Leser <path>{{path}}</path>",
+        "ar": "قراءة <path>{{path}}</path>",
+        "de": "Liest <path>{{path}}</path>",
+        "fr": "Lecture de <path>{{path}}</path>",
+        "it": "Lettura di <path>{{path}}</path>",
+        "pt": "Lendo <path>{{path}}</path>",
+        "es": "Leyendo <path>{{path}}</path>",
+        "tr": "<path>{{path}}</path> okunuyor",
+        "uk": "Читаю <path>{{path}}</path>"
    },
    "ACTION_MESSAGE$EDIT": {
-        "en": "Editing <path>{{action.payload.args.path}}</path>",
-        "zh-CN": "编辑 <path>{{action.payload.args.path}}</path>",
-        "zh-TW": "編輯 <path>{{action.payload.args.path}}</path>",
-        "ko-KR": "편집 <path>{{action.payload.args.path}}</path>",
-        "ja": "編集 <path>{{action.payload.args.path}}</path>",
-        "no": "Redigerer <path>{{action.payload.args.path}}</path>",
-        "ar": "تحرير <path>{{action.payload.args.path}}</path>",
-        "de": "Bearbeitet <path>{{action.payload.args.path}}</path>",
-        "fr": "Modification de <path>{{action.payload.args.path}}</path>",
-        "it": "Modifica di <path>{{action.payload.args.path}}</path>",
-        "pt": "Editando <path>{{action.payload.args.path}}</path>",
-        "es": "Editando <path>{{action.payload.args.path}}</path>",
-        "tr": "<path>{{action.payload.args.path}}</path> düzenleniyor",
-        "uk": "Редагую <path>{{action.payload.args.path}}</path>"
+        "en": "Editing <path>{{path}}</path>",
+        "zh-CN": "编辑 <path>{{path}}</path>",
+        "zh-TW": "編輯 <path>{{path}}</path>",
+        "ko-KR": "편집 <path>{{path}}</path>",
+        "ja": "編集 <path>{{path}}</path>",
+        "no": "Redigerer <path>{{path}}</path>",
+        "ar": "تحرير <path>{{path}}</path>",
+        "de": "Bearbeitet <path>{{path}}</path>",
+        "fr": "Modification de <path>{{path}}</path>",
+        "it": "Modifica di <path>{{path}}</path>",
+        "pt": "Editando <path>{{path}}</path>",
+        "es": "Editando <path>{{path}}</path>",
+        "tr": "<path>{{path}}</path> düzenleniyor",
+        "uk": "Редагую <path>{{path}}</path>"
    },
    "ACTION_MESSAGE$WRITE": {
-        "en": "Writing to <path>{{action.payload.args.path}}</path>",
-        "zh-CN": "写入 <path>{{action.payload.args.path}}</path>",
-        "zh-TW": "寫入 <path>{{action.payload.args.path}}</path>",
-        "ko-KR": "쓰기 <path>{{action.payload.args.path}}</path>",
-        "ja": "書き込み <path>{{action.payload.args.path}}</path>",
-        "no": "Skriver til <path>{{action.payload.args.path}}</path>",
-        "ar": "الكتابة إلى <path>{{action.payload.args.path}}</path>",
-        "de": "Schreibt in <path>{{action.payload.args.path}}</path>",
-        "fr": "Écriture dans <path>{{action.payload.args.path}}</path>",
-        "it": "Scrittura su <path>{{action.payload.args.path}}</path>",
-        "pt": "Escrevendo em <path>{{action.payload.args.path}}</path>",
-        "es": "Escribiendo en <path>{{action.payload.args.path}}</path>",
-        "tr": "<path>{{action.payload.args.path}}</path> dosyasına yazılıyor",
-        "uk": "Записую в <path>{{action.payload.args.path}}</path>"
+        "en": "Writing to <path>{{path}}</path>",
+        "zh-CN": "写入 <path>{{path}}</path>",
+        "zh-TW": "寫入 <path>{{path}}</path>",
+        "ko-KR": "쓰기 <path>{{path}}</path>",
+        "ja": "書き込み <path>{{path}}</path>",
+        "no": "Skriver til <path>{{path}}</path>",
+        "ar": "الكتابة إلى <path>{{path}}</path>",
+        "de": "Schreibt in <path>{{path}}</path>",
+        "fr": "Écriture dans <path>{{path}}</path>",
+        "it": "Scrittura su <path>{{path}}</path>",
+        "pt": "Escrevendo em <path>{{path}}</path>",
+        "es": "Escribiendo en <path>{{path}}</path>",
+        "tr": "<path>{{path}}</path> dosyasına yazılıyor",
+        "uk": "Записую в <path>{{path}}</path>"
    },
    "ACTION_MESSAGE$BROWSE": {
        "en": "Browsing the web",
@@ -6537,20 +6544,20 @@
        "uk": "Системне повідомлення"
    },
    "OBSERVATION_MESSAGE$RUN": {
-        "en": "Ran <cmd>{{observation.payload.extras.command}}</cmd>",
-        "zh-CN": "运行 <cmd>{{observation.payload.extras.command}}</cmd>",
-        "zh-TW": "執行 <cmd>{{observation.payload.extras.command}}</cmd>",
-        "ko-KR": "실행 <cmd>{{observation.payload.extras.command}}</cmd>",
-        "ja": "実行 <cmd>{{observation.payload.extras.command}}</cmd>",
-        "no": "Kjørte <cmd>{{observation.payload.extras.command}}</cmd>",
-        "ar": "تم تشغيل <cmd>{{observation.payload.extras.command}}</cmd>",
-        "de": "Führte <cmd>{{observation.payload.extras.command}}</cmd> aus",
-        "fr": "A exécuté <cmd>{{observation.payload.extras.command}}</cmd>",
-        "it": "Ha eseguito <cmd>{{observation.payload.extras.command}}</cmd>",
-        "pt": "Executou <cmd>{{observation.payload.extras.command}}</cmd>",
-        "es": "Ejecutó <cmd>{{observation.payload.extras.command}}</cmd>",
-        "tr": "<cmd>{{observation.payload.extras.command}}</cmd> çalıştırıldı",
-        "uk": "Запустив <cmd>{{observation.payload.extras.command}}</cmd>"
+        "en": "Ran <cmd>{{command}}</cmd>",
+        "zh-CN": "运行 <cmd>{{command}}</cmd>",
+        "zh-TW": "執行 <cmd>{{command}}</cmd>",
+        "ko-KR": "실행 <cmd>{{command}}</cmd>",
+        "ja": "実行 <cmd>{{command}}</cmd>",
+        "no": "Kjørte <cmd>{{command}}</cmd>",
+        "ar": "تم تشغيل <cmd>{{command}}</cmd>",
+        "de": "Führte <cmd>{{command}}</cmd> aus",
+        "fr": "A exécuté <cmd>{{command}}</cmd>",
+        "it": "Ha eseguito <cmd>{{command}}</cmd>",
+        "pt": "Executou <cmd>{{command}}</cmd>",
+        "es": "Ejecutó <cmd>{{command}}</cmd>",
+        "tr": "<cmd>{{command}}</cmd> çalıştırıldı",
+        "uk": "Запустив <cmd>{{command}}</cmd>"
    },
    "OBSERVATION_MESSAGE$RUN_IPYTHON": {
        "en": "Ran a Python command",
@@ -6569,52 +6576,52 @@
        "uk": "Виконав команду Python"
    },
    "OBSERVATION_MESSAGE$READ": {
-        "en": "Read <path>{{observation.payload.extras.path}}</path>",
-        "zh-CN": "读取 <path>{{observation.payload.extras.path}}</path>",
-        "zh-TW": "讀取 <path>{{observation.payload.extras.path}}</path>",
-        "ko-KR": "읽기 <path>{{observation.payload.extras.path}}</path>",
-        "ja": "読み取り <path>{{observation.payload.extras.path}}</path>",
-        "no": "Leste <path>{{observation.payload.extras.path}}</path>",
-        "ar": "تمت قراءة <path>{{observation.payload.extras.path}}</path>",
-        "de": "Las <path>{{observation.payload.extras.path}}</path>",
-        "fr": "A lu <path>{{observation.payload.extras.path}}</path>",
-        "it": "Ha letto <path>{{observation.payload.extras.path}}</path>",
-        "pt": "Leu <path>{{observation.payload.extras.path}}</path>",
-        "es": "Leyó <path>{{observation.payload.extras.path}}</path>",
-        "tr": "<path>{{observation.payload.extras.path}}</path> okundu",
-        "uk": "Прочитав <path>{{observation.payload.extras.path}}</path>"
+        "en": "Read <path>{{path}}</path>",
+        "zh-CN": "读取 <path>{{path}}</path>",
+        "zh-TW": "讀取 <path>{{path}}</path>",
+        "ko-KR": "읽기 <path>{{path}}</path>",
+        "ja": "読み取り <path>{{path}}</path>",
+        "no": "Leste <path>{{path}}</path>",
+        "ar": "تمت قراءة <path>{{path}}</path>",
+        "de": "Las <path>{{path}}</path>",
+        "fr": "A lu <path>{{path}}</path>",
+        "it": "Ha letto <path>{{path}}</path>",
+        "pt": "Leu <path>{{path}}</path>",
+        "es": "Leyó <path>{{path}}</path>",
+        "tr": "<path>{{path}}</path> okundu",
+        "uk": "Прочитав <path>{{path}}</path>"
    },
    "OBSERVATION_MESSAGE$EDIT": {
-        "en": "Edited <path>{{observation.payload.extras.path}}</path>",
-        "zh-CN": "编辑 <path>{{observation.payload.extras.path}}</path>",
-        "zh-TW": "編輯 <path>{{observation.payload.extras.path}}</path>",
-        "ko-KR": "편집 <path>{{observation.payload.extras.path}}</path>",
-        "ja": "編集 <path>{{observation.payload.extras.path}}</path>",
-        "no": "Redigerte <path>{{observation.payload.extras.path}}</path>",
-        "ar": "تم تحرير <path>{{observation.payload.extras.path}}</path>",
-        "de": "Hat <path>{{observation.payload.extras.path}}</path> bearbeitet",
-        "fr": "A modifié <path>{{observation.payload.extras.path}}</path>",
-        "it": "Ha modificato <path>{{observation.payload.extras.path}}</path>",
-        "pt": "Editou <path>{{observation.payload.extras.path}}</path>",
-        "es": "Editó <path>{{observation.payload.extras.path}}</path>",
-        "tr": "<path>{{observation.payload.extras.path}}</path> düzenlendi",
-        "uk": "Відредагував <path>{{observation.payload.extras.path}}</path>"
+        "en": "Edited <path>{{path}}</path>",
+        "zh-CN": "编辑 <path>{{path}}</path>",
+        "zh-TW": "編輯 <path>{{path}}</path>",
+        "ko-KR": "편집 <path>{{path}}</path>",
+        "ja": "編集 <path>{{path}}</path>",
+        "no": "Redigerte <path>{{path}}</path>",
+        "ar": "تم تحرير <path>{{path}}</path>",
+        "de": "Hat <path>{{path}}</path> bearbeitet",
+        "fr": "A modifié <path>{{path}}</path>",
+        "it": "Ha modificato <path>{{path}}</path>",
+        "pt": "Editou <path>{{path}}</path>",
+        "es": "Editó <path>{{path}}</path>",
+        "tr": "<path>{{path}}</path> düzenlendi",
+        "uk": "Відредагував <path>{{path}}</path>"
    },
    "OBSERVATION_MESSAGE$WRITE": {
-        "en": "Wrote to <path>{{observation.payload.extras.path}}</path>",
-        "zh-CN": "写入 <path>{{observation.payload.extras.path}}</path>",
-        "zh-TW": "寫入 <path>{{observation.payload.extras.path}}</path>",
-        "ko-KR": "쓰기 <path>{{observation.payload.extras.path}}</path>",
-        "ja": "書き込み <path>{{observation.payload.extras.path}}</path>",
-        "no": "Skrev til <path>{{observation.payload.extras.path}}</path>",
-        "ar": "تمت الكتابة إلى <path>{{observation.payload.extras.path}}</path>",
-        "de": "Hat in <path>{{observation.payload.extras.path}}</path> geschrieben",
-        "fr": "A écrit dans <path>{{observation.payload.extras.path}}</path>",
-        "it": "Ha scritto su <path>{{observation.payload.extras.path}}</path>",
-        "pt": "Escreveu em <path>{{observation.payload.extras.path}}</path>",
-        "es": "Escribió en <path>{{observation.payload.extras.path}}</path>",
-        "tr": "<path>{{observation.payload.extras.path}}</path> dosyasına yazıldı",
-        "uk": "Записав на <path>{{observation.payload.extras.path}}</path>"
+        "en": "Wrote to <path>{{path}}</path>",
+        "zh-CN": "写入 <path>{{path}}</path>",
+        "zh-TW": "寫入 <path>{{path}}</path>",
+        "ko-KR": "쓰기 <path>{{path}}</path>",
+        "ja": "書き込み <path>{{path}}</path>",
+        "no": "Skrev til <path>{{path}}</path>",
+        "ar": "تمت الكتابة إلى <path>{{path}}</path>",
+        "de": "Hat in <path>{{path}}</path> geschrieben",
+        "fr": "A écrit dans <path>{{path}}</path>",
+        "it": "Ha scritto su <path>{{path}}</path>",
+        "pt": "Escreveu em <path>{{path}}</path>",
+        "es": "Escribió en <path>{{path}}</path>",
+        "tr": "<path>{{path}}</path> dosyasına yazıldı",
+        "uk": "Записав на <path>{{path}}</path>"
    },
    "OBSERVATION_MESSAGE$BROWSE": {
        "en": "Browsing completed",
@@ -6649,20 +6656,20 @@
        "uk": "Результат інструменту MCP: {{action.payload.args.name}}"
    },
    "OBSERVATION_MESSAGE$RECALL": {
-        "en": "Microagent Activated",
-        "ja": "マイクロエージェントが有効化されました",
-        "zh-CN": "微代理已激活",
-        "zh-TW": "微代理已啟動",
-        "ko-KR": "마이크로에이전트 활성화됨",
-        "no": "MikroAgent aktivert",
-        "it": "Microagent attivato",
-        "pt": "Microagent ativado",
-        "es": "Microagent activado",
-        "ar": "تم تنشيط الوكيل المصغر",
-        "fr": "Microagent activé",
-        "tr": "MikroAjan Etkinleştirildi",
-        "de": "Microagent aktiviert",
-        "uk": "Мікроагент активований"
+        "en": "Microagent ready",
+        "ja": "マイクロエージェントの準備完了",
+        "zh-CN": "微代理已就绪",
+        "zh-TW": "微代理已就緒",
+        "ko-KR": "마이크로에이전트 준비됨",
+        "no": "MikroAgent klar",
+        "it": "Microagent pronto",
+        "pt": "Microagent pronto",
+        "es": "Microagent listo",
+        "ar": "الوكيل المصغر جاهز",
+        "fr": "Microagent prêt",
+        "tr": "MikroAjan hazır",
+        "de": "Microagent bereit",
+        "uk": "Мікроагент готовий"
    },
    "EXPANDABLE_MESSAGE$SHOW_DETAILS": {
        "en": "Show details",
--- a/frontend/src/routes/conversation.tsx
+++ b/frontend/src/routes/conversation.tsx
@@ -13,7 +13,6 @@ import {
  useConversation,
 } from "#/context/conversation-context";
 import { Controls } from "#/components/features/controls/controls";
-import { clearMessages, addUserMessage } from "#/state/chat-slice";
 import { clearTerminal } from "#/state/command-slice";
 import { useEffectOnce } from "#/hooks/use-effect-once";
 import GlobeIcon from "#/icons/globe.svg?react";
@@ -34,7 +33,6 @@ import Security from "#/components/shared/modals/security/security";
 import { useUserConversation } from "#/hooks/query/use-user-conversation";
 import { ServedAppLabel } from "#/components/layout/served-app-label";
 import { useSettings } from "#/hooks/query/use-settings";
-import { clearFiles, clearInitialPrompt } from "#/state/initial-query-slice";
 import { RootState } from "#/store";
 import { displayErrorToast } from "#/utils/custom-toast-handlers";
 import { useDocumentTitleFromState } from "#/hooks/use-document-title-from-state";
@@ -49,9 +47,7 @@ function AppContent() {
  const { data: conversation, isFetched } = useUserConversation(
    conversationId || null,
  );
-  const { initialPrompt, files } = useSelector(
-    (state: RootState) => state.initialQuery,
-  );
+
  const { curAgentState } = useSelector((state: RootState) => state.agent);
  const dispatch = useDispatch();
  const navigate = useNavigate();
@@ -71,25 +67,11 @@ function AppContent() {
  }, [conversation, isFetched]);

  React.useEffect(() => {
-    dispatch(clearMessages());
    dispatch(clearTerminal());
    dispatch(clearJupyter());
-    if (conversationId && (initialPrompt || files.length > 0)) {
-      dispatch(
-        addUserMessage({
-          content: initialPrompt || "",
-          imageUrls: files || [],
-          timestamp: new Date().toISOString(),
-          pending: true,
-        }),
-      );
-      dispatch(clearInitialPrompt());
-      dispatch(clearFiles());
-    }
  }, [conversationId]);

  useEffectOnce(() => {
-    dispatch(clearMessages());
    dispatch(clearTerminal());
    dispatch(clearJupyter());
  });
--- a/frontend/src/routes/root-layout.tsx
+++ b/frontend/src/routes/root-layout.tsx
@@ -58,7 +58,7 @@ export function ErrorBoundary() {
 export default function MainApp() {
  const navigate = useNavigate();
  const { pathname } = useLocation();
-  const tosPageStatus = useIsOnTosPage();
+  const isOnTosPage = useIsOnTosPage();
  const { data: settings } = useSettings();
  const { error } = useBalance();
  const { migrateUserConsent } = useMigrateUserConsent();
@@ -68,7 +68,7 @@ export default function MainApp() {
  const {
    data: isAuthed,
    isFetching: isFetchingAuth,
-    isError: authError,
+    isError: isAuthError,
  } = useIsAuthed();

  // Always call the hook, but we'll only use the result when not on TOS page
@@ -78,30 +78,30 @@ export default function MainApp() {
  });

  // When on TOS page, we don't use the GitHub auth URL
-  const effectiveGitHubAuthUrl = tosPageStatus ? null : gitHubAuthUrl;
+  const effectiveGitHubAuthUrl = isOnTosPage ? null : gitHubAuthUrl;

  const [consentFormIsOpen, setConsentFormIsOpen] = React.useState(false);

  React.useEffect(() => {
    // Don't change language when on TOS page
-    if (!tosPageStatus && settings?.LANGUAGE) {
+    if (!isOnTosPage && settings?.LANGUAGE) {
      i18n.changeLanguage(settings.LANGUAGE);
    }
-  }, [settings?.LANGUAGE, tosPageStatus]);
+  }, [settings?.LANGUAGE, isOnTosPage]);

  React.useEffect(() => {
    // Don't show consent form when on TOS page
-    if (!tosPageStatus) {
+    if (!isOnTosPage) {
      const consentFormModalIsOpen =
        settings?.USER_CONSENTS_TO_ANALYTICS === null;

      setConsentFormIsOpen(consentFormModalIsOpen);
    }
-  }, [settings, tosPageStatus]);
+  }, [settings, isOnTosPage]);

  React.useEffect(() => {
    // Don't migrate user consent when on TOS page
-    if (!tosPageStatus) {
+    if (!isOnTosPage) {
      // Migrate user consent to the server if it was previously stored in localStorage
      migrateUserConsent({
        handleAnalyticsWasPresentInLocalStorage: () => {
@@ -109,7 +109,7 @@ export default function MainApp() {
        },
      });
    }
-  }, [tosPageStatus]);
+  }, [isOnTosPage]);

  React.useEffect(() => {
    if (settings?.IS_NEW_USER && config.data?.APP_MODE === "saas") {
@@ -120,22 +120,16 @@ export default function MainApp() {
  React.useEffect(() => {
    // Don't do any redirects when on TOS page
    // Don't allow users to use the app if it 402s
-    if (!tosPageStatus && error?.status === 402 && pathname !== "/") {
+    if (!isOnTosPage && error?.status === 402 && pathname !== "/") {
      navigate("/");
    }
-  }, [error?.status, pathname, tosPageStatus]);
+  }, [error?.status, pathname, isOnTosPage]);

-  // When on TOS page, we don't make any API calls, so we need to handle this case
-  const userIsAuthed = tosPageStatus ? false : !!isAuthed && !authError;
-
-  // Only show the auth modal if:
-  // 1. User is not authenticated
-  // 2. We're not currently on the TOS page
-  // 3. We're in SaaS mode
  const renderAuthModal =
+    !isAuthed &&
+    !isAuthError &&
    !isFetchingAuth &&
-    !userIsAuthed &&
-    !tosPageStatus &&
+    !isOnTosPage &&
    config.data?.APP_MODE === "saas";

  return (
--- a/frontend/src/services/tests/actions.test.ts
+++ b/frontend/src/services/tests/actions.test.ts
@@ -4,7 +4,6 @@ import { StatusMessage } from "#/types/message";
 import { queryClient } from "#/query-client-config";
 import store from "#/store";
 import { setCurStatusMessage } from "#/state/status-slice";
-import { addErrorMessage } from "#/state/chat-slice";
 import { trackError } from "#/utils/error-handler";

 // Mock dependencies
@@ -101,9 +100,6 @@ describe("handleStatusMessage", () => {
      metadata: { msgId: "ERROR_ID" },
    });

-    // Verify that store.dispatch was called with addErrorMessage
-    expect(store.dispatch).toHaveBeenCalledWith(addErrorMessage(statusMessage));
-
    // Verify that queryClient.invalidateQueries was not called
    expect(queryClient.invalidateQueries).not.toHaveBeenCalled();
  });
--- a/frontend/src/services/actions.ts
+++ b/frontend/src/services/actions.ts
@@ -1,13 +1,5 @@
-import {
-  addAssistantMessage,
-  addAssistantAction,
-  addUserMessage,
-  addErrorMessage,
-} from "#/state/chat-slice";
 import { trackError } from "#/utils/error-handler";
 import { appendSecurityAnalyzerInput } from "#/state/security-analyzer-slice";
-import { setCode, setActiveFilepath } from "#/state/code-slice";
-import { appendJupyterInput } from "#/state/jupyter-slice";
 import { setCurStatusMessage } from "#/state/status-slice";
 import { setMetrics } from "#/state/metrics-slice";
 import store from "#/store";
@@ -21,67 +13,6 @@ import { handleObservationMessage } from "./observations";
 import { appendInput } from "#/state/command-slice";
 import { queryClient } from "#/query-client-config";

-const messageActions = {
-  [ActionType.BROWSE]: (message: ActionMessage) => {
-    if (!message.args.thought && message.message) {
-      store.dispatch(addAssistantMessage(message.message));
-    }
-  },
-  [ActionType.BROWSE_INTERACTIVE]: (message: ActionMessage) => {
-    if (!message.args.thought && message.message) {
-      store.dispatch(addAssistantMessage(message.message));
-    }
-  },
-  [ActionType.WRITE]: (message: ActionMessage) => {
-    const { path, content } = message.args;
-    store.dispatch(setActiveFilepath(path));
-    store.dispatch(setCode(content));
-  },
-  [ActionType.MESSAGE]: (message: ActionMessage) => {
-    if (message.source === "user") {
-      store.dispatch(
-        addUserMessage({
-          content: message.args.content,
-          imageUrls:
-            typeof message.args.image_urls === "string"
-              ? [message.args.image_urls]
-              : message.args.image_urls,
-          timestamp: message.timestamp,
-          pending: false,
-        }),
-      );
-    } else {
-      store.dispatch(addAssistantMessage(message.args.content));
-    }
-  },
-  [ActionType.RUN_IPYTHON]: (message: ActionMessage) => {
-    if (message.args.confirmation_state !== "rejected") {
-      store.dispatch(appendJupyterInput(message.args.code));
-    }
-  },
-  [ActionType.FINISH]: (message: ActionMessage) => {
-    store.dispatch(addAssistantMessage(message.args.final_thought));
-    let successPrediction = "";
-    if (message.args.task_completed === "partial") {
-      successPrediction =
-        "I believe that the task was **completed partially**.";
-    } else if (message.args.task_completed === "false") {
-      successPrediction = "I believe that the task was **not completed**.";
-    } else if (message.args.task_completed === "true") {
-      successPrediction =
-        "I believe that the task was **completed successfully**.";
-    }
-    if (successPrediction) {
-      // if final_thought is not empty, add a new line before the success prediction
-      if (message.args.final_thought) {
-        store.dispatch(addAssistantMessage(`\n${successPrediction}`));
-      } else {
-        store.dispatch(addAssistantMessage(successPrediction));
-      }
-    }
-  },
-};
-
 export function handleActionMessage(message: ActionMessage) {
  if (message.args?.hidden) {
    return;
@@ -103,26 +34,6 @@ export function handleActionMessage(message: ActionMessage) {
  if ("args" in message && "security_risk" in message.args) {
    store.dispatch(appendSecurityAnalyzerInput(message));
  }
-
-  if (message.source === "agent") {
-    // Only add thought as a message if it's not a "think" action
-    if (
-      message.args &&
-      message.args.thought &&
-      message.action !== ActionType.THINK
-    ) {
-      store.dispatch(addAssistantMessage(message.args.thought));
-    }
-    // Need to convert ActionMessage to RejectAction
-    // @ts-expect-error TODO: fix
-    store.dispatch(addAssistantAction(message));
-  }
-
-  if (message.action in messageActions) {
-    const actionFn =
-      messageActions[message.action as keyof typeof messageActions];
-    actionFn(message);
-  }
 }

 export function handleStatusMessage(message: StatusMessage) {
@@ -146,11 +57,6 @@ export function handleStatusMessage(message: StatusMessage) {
      source: "chat",
      metadata: { msgId: message.id },
    });
-    store.dispatch(
-      addErrorMessage({
-        ...message,
-      }),
-    );
  }
 }

@@ -161,33 +67,5 @@ export function handleAssistantMessage(message: Record<string, unknown>) {
    handleObservationMessage(message as unknown as ObservationMessage);
  } else if (message.status_update) {
    handleStatusMessage(message as unknown as StatusMessage);
-  } else if (message.error) {
-    // Handle error messages from the server
-    const errorMessage =
-      typeof message.message === "string"
-        ? message.message
-        : String(message.message || "Unknown error");
-    trackError({
-      message: errorMessage,
-      source: "websocket",
-      metadata: { raw_message: message },
-    });
-    store.dispatch(
-      addErrorMessage({
-        message: errorMessage,
-      }),
-    );
-  } else {
-    const errorMsg = "Unknown message type received";
-    trackError({
-      message: errorMsg,
-      source: "chat",
-      metadata: { raw_message: message },
-    });
-    store.dispatch(
-      addErrorMessage({
-        message: errorMsg,
-      }),
-    );
  }
 }
--- a/frontend/src/services/observations.ts
+++ b/frontend/src/services/observations.ts
@@ -2,14 +2,9 @@ import { setCurrentAgentState } from "#/state/agent-slice";
 import { setUrl, setScreenshotSrc } from "#/state/browser-slice";
 import store from "#/store";
 import { ObservationMessage } from "#/types/message";
-import { AgentState } from "#/types/agent-state";
 import { appendOutput } from "#/state/command-slice";
 import { appendJupyterOutput } from "#/state/jupyter-slice";
 import ObservationType from "#/types/observation-type";
-import {
-  addAssistantMessage,
-  addAssistantObservation,
-} from "#/state/chat-slice";

 export function handleObservationMessage(message: ObservationMessage) {
  switch (message.observation) {
@@ -26,8 +21,14 @@ export function handleObservationMessage(message: ObservationMessage) {
      break;
    }
    case ObservationType.RUN_IPYTHON:
-      // FIXME: render this as markdown
-      store.dispatch(appendJupyterOutput(message.content));
+      store.dispatch(
+        appendJupyterOutput({
+          content: message.content,
+          imageUrls: Array.isArray(message.extras?.image_urls)
+            ? message.extras.image_urls
+            : undefined,
+        }),
+      );
      break;
    case ObservationType.BROWSE:
    case ObservationType.BROWSE_INTERACTIVE:
@@ -42,11 +43,6 @@ export function handleObservationMessage(message: ObservationMessage) {
      store.dispatch(setCurrentAgentState(message.extras.agent_state));
      break;
    case ObservationType.DELEGATE:
-      // TODO: better UI for delegation result (#2309)
-      if (message.content) {
-        store.dispatch(addAssistantMessage(message.content));
-      }
-      break;
    case ObservationType.READ:
    case ObservationType.EDIT:
    case ObservationType.THINK:
@@ -56,107 +52,13 @@ export function handleObservationMessage(message: ObservationMessage) {
    case ObservationType.MCP:
      break; // We don't display the default message for these observations
    default:
-      store.dispatch(addAssistantMessage(message.message));
      break;
  }
  if (!message.extras?.hidden) {
    // Convert the message to the appropriate observation type
    const { observation } = message;
-    const baseObservation = {
-      ...message,
-      source: "agent" as const,
-    };

    switch (observation) {
-      case "agent_state_changed":
-        store.dispatch(
-          addAssistantObservation({
-            ...baseObservation,
-            observation: "agent_state_changed" as const,
-            extras: {
-              agent_state: (message.extras.agent_state as AgentState) || "idle",
-            },
-          }),
-        );
-        break;
-      case "recall":
-        store.dispatch(
-          addAssistantObservation({
-            ...baseObservation,
-            observation: "recall" as const,
-            extras: {
-              ...(message.extras || {}),
-              recall_type:
-                (message.extras?.recall_type as
-                  | "workspace_context"
-                  | "knowledge") || "knowledge",
-            },
-          }),
-        );
-        break;
-      case "run":
-        store.dispatch(
-          addAssistantObservation({
-            ...baseObservation,
-            observation: "run" as const,
-            extras: {
-              command: String(message.extras.command || ""),
-              metadata: message.extras.metadata,
-              hidden: Boolean(message.extras.hidden),
-            },
-          }),
-        );
-        break;
-      case "read":
-        store.dispatch(
-          addAssistantObservation({
-            ...baseObservation,
-            observation,
-            extras: {
-              path: String(message.extras.path || ""),
-              impl_source: String(message.extras.impl_source || ""),
-            },
-          }),
-        );
-        break;
-      case "edit":
-        store.dispatch(
-          addAssistantObservation({
-            ...baseObservation,
-            observation,
-            extras: {
-              path: String(message.extras.path || ""),
-              diff: String(message.extras.diff || ""),
-              impl_source: String(message.extras.impl_source || ""),
-            },
-          }),
-        );
-        break;
-      case "run_ipython":
-        store.dispatch(
-          addAssistantObservation({
-            ...baseObservation,
-            observation: "run_ipython" as const,
-            extras: {
-              code: String(message.extras.code || ""),
-            },
-          }),
-        );
-        break;
-      case "delegate":
-        store.dispatch(
-          addAssistantObservation({
-            ...baseObservation,
-            observation: "delegate" as const,
-            extras: {
-              outputs:
-                typeof message.extras.outputs === "object"
-                  ? (message.extras.outputs as Record<string, unknown>)
-                  : {},
-            },
-          }),
-        );
-        break;
      case "browse":
        if (message.extras?.screenshot) {
          store.dispatch(setScreenshotSrc(message.extras.screenshot));
@@ -164,45 +66,6 @@ export function handleObservationMessage(message: ObservationMessage) {
        if (message.extras?.url) {
          store.dispatch(setUrl(message.extras.url));
        }
-
-        store.dispatch(
-          addAssistantObservation({
-            ...baseObservation,
-            observation: "browse" as const,
-            extras: {
-              url: String(message.extras.url || ""),
-              screenshot: String(message.extras.screenshot || ""),
-              error: Boolean(message.extras.error),
-              open_page_urls: Array.isArray(message.extras.open_page_urls)
-                ? message.extras.open_page_urls
-                : [],
-              active_page_index: Number(message.extras.active_page_index || 0),
-              dom_object:
-                typeof message.extras.dom_object === "object"
-                  ? (message.extras.dom_object as Record<string, unknown>)
-                  : {},
-              axtree_object:
-                typeof message.extras.axtree_object === "object"
-                  ? (message.extras.axtree_object as Record<string, unknown>)
-                  : {},
-              extra_element_properties:
-                typeof message.extras.extra_element_properties === "object"
-                  ? (message.extras.extra_element_properties as Record<
-                      string,
-                      unknown
-                    >)
-                  : {},
-              last_browser_action: String(
-                message.extras.last_browser_action || "",
-              ),
-              last_browser_action_error:
-                message.extras.last_browser_action_error,
-              focused_element_bid: String(
-                message.extras.focused_element_bid || "",
-              ),
-            },
-          }),
-        );
        break;
      case "browse_interactive":
        if (message.extras?.screenshot) {
@@ -211,65 +74,6 @@ export function handleObservationMessage(message: ObservationMessage) {
        if (message.extras?.url) {
          store.dispatch(setUrl(message.extras.url));
        }
-
-        store.dispatch(
-          addAssistantObservation({
-            ...baseObservation,
-            observation: "browse_interactive" as const,
-            extras: {
-              url: String(message.extras.url || ""),
-              screenshot: String(message.extras.screenshot || ""),
-              error: Boolean(message.extras.error),
-              open_page_urls: Array.isArray(message.extras.open_page_urls)
-                ? message.extras.open_page_urls
-                : [],
-              active_page_index: Number(message.extras.active_page_index || 0),
-              dom_object:
-                typeof message.extras.dom_object === "object"
-                  ? (message.extras.dom_object as Record<string, unknown>)
-                  : {},
-              axtree_object:
-                typeof message.extras.axtree_object === "object"
-                  ? (message.extras.axtree_object as Record<string, unknown>)
-                  : {},
-              extra_element_properties:
-                typeof message.extras.extra_element_properties === "object"
-                  ? (message.extras.extra_element_properties as Record<
-                      string,
-                      unknown
-                    >)
-                  : {},
-              last_browser_action: String(
-                message.extras.last_browser_action || "",
-              ),
-              last_browser_action_error:
-                message.extras.last_browser_action_error,
-              focused_element_bid: String(
-                message.extras.focused_element_bid || "",
-              ),
-            },
-          }),
-        );
-        break;
-      case "error":
-        store.dispatch(
-          addAssistantObservation({
-            ...baseObservation,
-            observation: "error" as const,
-            source: "user" as const,
-            extras: {
-              error_id: message.extras.error_id,
-            },
-          }),
-        );
-        break;
-      case "mcp":
-        store.dispatch(
-          addAssistantObservation({
-            ...baseObservation,
-            observation: "mcp" as const,
-          }),
-        );
        break;
      default:
        // For any unhandled observation types, just ignore them
--- a/frontend/src/state/chat-slice.ts
+++ b/frontend/src/state/chat-slice.ts
@@ -1,369 +0,0 @@
-import { createSlice, PayloadAction } from "@reduxjs/toolkit";
-import type { Message } from "#/message";
-
-import { ActionSecurityRisk } from "#/state/security-analyzer-slice";
-import { OpenHandsAction } from "#/types/core/actions";
-import { OpenHandsEventType } from "#/types/core/base";
-import {
-  CommandObservation,
-  IPythonObservation,
-  OpenHandsObservation,
-  RecallObservation,
-} from "#/types/core/observations";
-
-type SliceState = {
-  messages: Message[];
-  systemMessage: {
-    content: string;
-    tools: Array<Record<string, unknown>> | null;
-    openhands_version: string | null;
-    agent_class: string | null;
-  } | null;
-};
-
-const MAX_CONTENT_LENGTH = 1000;
-
-const HANDLED_ACTIONS: OpenHandsEventType[] = [
-  "run",
-  "run_ipython",
-  "write",
-  "read",
-  "browse",
-  "browse_interactive",
-  "edit",
-  "recall",
-  "think",
-  "system",
-  "call_tool_mcp",
-  "mcp",
-];
-
-function getRiskText(risk: ActionSecurityRisk) {
-  switch (risk) {
-    case ActionSecurityRisk.LOW:
-      return "Low Risk";
-    case ActionSecurityRisk.MEDIUM:
-      return "Medium Risk";
-    case ActionSecurityRisk.HIGH:
-      return "High Risk";
-    case ActionSecurityRisk.UNKNOWN:
-    default:
-      return "Unknown Risk";
-  }
-}
-
-const initialState: SliceState = {
-  messages: [],
-  systemMessage: null,
-};
-
-export const chatSlice = createSlice({
-  name: "chat",
-  initialState,
-  reducers: {
-    addUserMessage(
-      state,
-      action: PayloadAction<{
-        content: string;
-        imageUrls: string[];
-        timestamp: string;
-        pending?: boolean;
-      }>,
-    ) {
-      const message: Message = {
-        type: "thought",
-        sender: "user",
-        content: action.payload.content,
-        imageUrls: action.payload.imageUrls,
-        timestamp: action.payload.timestamp || new Date().toISOString(),
-        pending: !!action.payload.pending,
-      };
-      // Remove any pending messages
-      let i = state.messages.length;
-      while (i) {
-        i -= 1;
-        const m = state.messages[i] as Message;
-        if (m.pending) {
-          state.messages.splice(i, 1);
-        }
-      }
-      state.messages.push(message);
-    },
-
-    addAssistantMessage(state: SliceState, action: PayloadAction<string>) {
-      const message: Message = {
-        type: "thought",
-        sender: "assistant",
-        content: action.payload,
-        imageUrls: [],
-        timestamp: new Date().toISOString(),
-        pending: false,
-      };
-      state.messages.push(message);
-    },
-
-    addAssistantAction(
-      state: SliceState,
-      action: PayloadAction<OpenHandsAction>,
-    ) {
-      const actionID = action.payload.action;
-      if (!HANDLED_ACTIONS.includes(actionID)) {
-        return;
-      }
-      const translationID = `ACTION_MESSAGE$${actionID.toUpperCase()}`;
-      let text = "";
-
-      if (actionID === "system") {
-        // Store the system message in the state
-        state.systemMessage = {
-          content: action.payload.args.content,
-          tools: action.payload.args.tools,
-          openhands_version: action.payload.args.openhands_version,
-          agent_class: action.payload.args.agent_class,
-        };
-        // Don't add a message for system actions
-        return;
-      }
-      if (actionID === "run") {
-        text = `Command:\n\`${action.payload.args.command}\``;
-      } else if (actionID === "run_ipython") {
-        text = `\`\`\`\n${action.payload.args.code}\n\`\`\``;
-      } else if (actionID === "write") {
-        let { content } = action.payload.args;
-        if (content.length > MAX_CONTENT_LENGTH) {
-          content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
-        }
-        text = `${action.payload.args.path}\n${content}`;
-      } else if (actionID === "browse") {
-        text = `Browsing ${action.payload.args.url}`;
-      } else if (actionID === "browse_interactive") {
-        // Include the browser_actions in the content
-        text = `**Action:**\n\n\`\`\`python\n${action.payload.args.browser_actions}\n\`\`\``;
-      } else if (actionID === "recall") {
-        // skip recall actions
-        return;
-      } else if (actionID === "call_tool_mcp") {
-        // Format MCP action with name and arguments
-        const name = action.payload.args.name || "";
-        const args = action.payload.args.arguments || {};
-        text = `**MCP Tool Call:** ${name}\n\n`;
-        // Include thought if available
-        if (action.payload.args.thought) {
-          text += `\n\n**Thought:**\n${action.payload.args.thought}`;
-        }
-        text += `\n\n**Arguments:**\n\`\`\`json\n${JSON.stringify(args, null, 2)}\n\`\`\``;
-      }
-      if (actionID === "run" || actionID === "run_ipython") {
-        if (
-          action.payload.args.confirmation_state === "awaiting_confirmation"
-        ) {
-          text += `\n\n${getRiskText(action.payload.args.security_risk as unknown as ActionSecurityRisk)}`;
-        }
-      } else if (actionID === "think") {
-        text = action.payload.args.thought;
-      }
-      const message: Message = {
-        type: "action",
-        sender: "assistant",
-        translationID,
-        eventID: action.payload.id,
-        content: text,
-        imageUrls: [],
-        timestamp: new Date().toISOString(),
-        action,
-      };
-
-      state.messages.push(message);
-    },
-
-    addAssistantObservation(
-      state: SliceState,
-      observation: PayloadAction<OpenHandsObservation>,
-    ) {
-      const observationID = observation.payload.observation;
-      if (!HANDLED_ACTIONS.includes(observationID)) {
-        return;
-      }
-
-      // Special handling for RecallObservation - create a new message instead of updating an existing one
-      if (observationID === "recall") {
-        const recallObs = observation.payload as RecallObservation;
-        let content = ``;
-
-        // Handle workspace context
-        if (recallObs.extras.recall_type === "workspace_context") {
-          if (recallObs.extras.repo_name) {
-            content += `\n\n**Repository:** ${recallObs.extras.repo_name}`;
-          }
-          if (recallObs.extras.repo_directory) {
-            content += `\n\n**Directory:** ${recallObs.extras.repo_directory}`;
-          }
-          if (recallObs.extras.date) {
-            content += `\n\n**Date:** ${recallObs.extras.date}`;
-          }
-          if (
-            recallObs.extras.runtime_hosts &&
-            Object.keys(recallObs.extras.runtime_hosts).length > 0
-          ) {
-            content += `\n\n**Available Hosts**`;
-            for (const [host, port] of Object.entries(
-              recallObs.extras.runtime_hosts,
-            )) {
-              content += `\n\n- ${host} (port ${port})`;
-            }
-          }
-          if (recallObs.extras.repo_instructions) {
-            content += `\n\n**Repository Instructions:**\n\n${recallObs.extras.repo_instructions}`;
-          }
-          if (recallObs.extras.additional_agent_instructions) {
-            content += `\n\n**Additional Instructions:**\n\n${recallObs.extras.additional_agent_instructions}`;
-          }
-        }
-
-        // Create a new message for the observation
-        // Use the correct translation ID format that matches what's in the i18n file
-        const translationID = `OBSERVATION_MESSAGE$${observationID.toUpperCase()}`;
-
-        // Handle microagent knowledge
-        if (
-          recallObs.extras.microagent_knowledge &&
-          recallObs.extras.microagent_knowledge.length > 0
-        ) {
-          content += `\n\n**Triggered Microagent Knowledge:**`;
-          for (const knowledge of recallObs.extras.microagent_knowledge) {
-            content += `\n\n- **${knowledge.name}** (triggered by keyword: ${knowledge.trigger})\n\n\`\`\`\n${knowledge.content}\n\`\`\``;
-          }
-        }
-
-        const message: Message = {
-          type: "action",
-          sender: "assistant",
-          translationID,
-          eventID: observation.payload.id,
-          content,
-          imageUrls: [],
-          timestamp: new Date().toISOString(),
-          success: true,
-        };
-
-        state.messages.push(message);
-        return; // Skip the normal observation handling below
-      }
-
-      // Normal handling for other observation types
-      const translationID = `OBSERVATION_MESSAGE$${observationID.toUpperCase()}`;
-      const causeID = observation.payload.cause;
-      const causeMessage = state.messages.find(
-        (message) => message.eventID === causeID,
-      );
-      if (!causeMessage) {
-        return;
-      }
-      causeMessage.translationID = translationID;
-      causeMessage.observation = observation;
-      // Set success property based on observation type
-      if (observationID === "run") {
-        const commandObs = observation.payload as CommandObservation;
-        // If exit_code is -1, it means the command timed out, so we set success to undefined
-        // to not show any status indicator
-        if (commandObs.extras.metadata.exit_code === -1) {
-          causeMessage.success = undefined;
-        } else {
-          causeMessage.success = commandObs.extras.metadata.exit_code === 0;
-        }
-      } else if (observationID === "run_ipython") {
-        // For IPython, we consider it successful if there's no error message
-        const ipythonObs = observation.payload as IPythonObservation;
-        causeMessage.success = !ipythonObs.content
-          .toLowerCase()
-          .includes("error:");
-      } else if (observationID === "read" || observationID === "edit") {
-        // For read/edit operations, we consider it successful if there's content and no error
-
-        if (observation.payload.extras.impl_source === "oh_aci") {
-          causeMessage.success =
-            observation.payload.content.length > 0 &&
-            !observation.payload.content.startsWith("ERROR:\n");
-        } else {
-          causeMessage.success =
-            observation.payload.content.length > 0 &&
-            !observation.payload.content.toLowerCase().includes("error:");
-        }
-      }
-
-      if (observationID === "run" || observationID === "run_ipython") {
-        let { content } = observation.payload;
-        if (content.length > MAX_CONTENT_LENGTH) {
-          content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
-        }
-        content = `${causeMessage.content}\n\nOutput:\n\`\`\`\n${content.trim() || "[Command finished execution with no output]"}\n\`\`\``;
-        causeMessage.content = content; // Observation content includes the action
-      } else if (observationID === "read") {
-        causeMessage.content = `\`\`\`\n${observation.payload.content}\n\`\`\``; // Content is already truncated by the ACI
-      } else if (observationID === "edit") {
-        if (causeMessage.success) {
-          causeMessage.content = `\`\`\`diff\n${observation.payload.extras.diff}\n\`\`\``; // Content is already truncated by the ACI
-        } else {
-          causeMessage.content = observation.payload.content;
-        }
-      } else if (observationID === "browse") {
-        let content = `**URL:** ${observation.payload.extras.url}\n`;
-        if (observation.payload.extras.error) {
-          content += `\n\n**Error:**\n${observation.payload.extras.error}\n`;
-        }
-        content += `\n\n**Output:**\n${observation.payload.content}`;
-        if (content.length > MAX_CONTENT_LENGTH) {
-          content = `${content.slice(0, MAX_CONTENT_LENGTH)}...(truncated)`;
-        }
-        causeMessage.content = content;
-      } else if (observationID === "mcp") {
-        // For MCP observations, we want to show the content as formatted output
-        // similar to how run/run_ipython actions are handled
-        let { content } = observation.payload;
-        if (content.length > MAX_CONTENT_LENGTH) {
-          content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
-        }
-        content = `${causeMessage.content}\n\n**Output:**\n\`\`\`\n${content.trim() || "[MCP Tool finished execution with no output]"}\n\`\`\``;
-        causeMessage.content = content; // Observation content includes the action
-        // Set success based on whether there's an error message
-        causeMessage.success = !observation.payload.content
-          .toLowerCase()
-          .includes("error:");
-      }
-    },
-
-    addErrorMessage(
-      state: SliceState,
-      action: PayloadAction<{ id?: string; message: string }>,
-    ) {
-      const { id, message } = action.payload;
-      state.messages.push({
-        translationID: id,
-        content: message,
-        type: "error",
-        sender: "assistant",
-        timestamp: new Date().toISOString(),
-      });
-    },
-
-    clearMessages(state: SliceState) {
-      state.messages = [];
-      state.systemMessage = null;
-    },
-  },
-});
-
-export const {
-  addUserMessage,
-  addAssistantMessage,
-  addAssistantAction,
-  addAssistantObservation,
-  addErrorMessage,
-  clearMessages,
-} = chatSlice.actions;
-
-// Selectors
-export const selectSystemMessage = (state: { chat: SliceState }) =>
-  state.chat.systemMessage;
-
-export default chatSlice.reducer;
--- a/frontend/src/state/jupyter-slice.ts
+++ b/frontend/src/state/jupyter-slice.ts
@@ -3,6 +3,7 @@ import { createSlice } from "@reduxjs/toolkit";
 export type Cell = {
  content: string;
  type: "input" | "output";
+  imageUrls?: string[];
 };

 const initialCells: Cell[] = [];
@@ -17,7 +18,11 @@ export const jupyterSlice = createSlice({
      state.cells.push({ content: action.payload, type: "input" });
    },
    appendJupyterOutput: (state, action) => {
-      state.cells.push({ content: action.payload, type: "output" });
+      state.cells.push({
+        content: action.payload.content,
+        type: "output",
+        imageUrls: action.payload.imageUrls,
+      });
    },
    clearJupyter: (state) => {
      state.cells = [];
--- a/frontend/src/store.ts
+++ b/frontend/src/store.ts
@@ -1,7 +1,6 @@
 import { combineReducers, configureStore } from "@reduxjs/toolkit";
 import agentReducer from "./state/agent-slice";
 import browserReducer from "./state/browser-slice";
-import chatReducer from "./state/chat-slice";
 import codeReducer from "./state/code-slice";
 import fileStateReducer from "./state/file-state-slice";
 import initialQueryReducer from "./state/initial-query-slice";
@@ -15,7 +14,6 @@ export const rootReducer = combineReducers({
  fileState: fileStateReducer,
  initialQuery: initialQueryReducer,
  browser: browserReducer,
-  chat: chatReducer,
  code: codeReducer,
  cmd: commandReducer,
  agent: agentReducer,
--- a/frontend/src/types/core/base.ts
+++ b/frontend/src/types/core/base.ts
@@ -2,6 +2,7 @@ export type OpenHandsEventType =
  | "message"
  | "system"
  | "agent_state_changed"
+  | "change_agent_state"
  | "run"
  | "read"
  | "write"
@@ -16,11 +17,14 @@ export type OpenHandsEventType =
  | "error"
  | "recall"
  | "mcp"
-  | "call_tool_mcp";
+  | "call_tool_mcp"
+  | "user_rejected";
+
+export type OpenHandsSourceType = "agent" | "user" | "environment";

 interface OpenHandsBaseEvent {
  id: number;
-  source: "agent" | "user";
+  source: OpenHandsSourceType;
  message: string;
  timestamp: string; // ISO 8601
 }
--- a/frontend/src/types/core/guards.ts
+++ b/frontend/src/types/core/guards.ts
@@ -0,0 +1,59 @@
+import { OpenHandsParsedEvent } from ".";
+import {
+  UserMessageAction,
+  AssistantMessageAction,
+  OpenHandsAction,
+  SystemMessageAction,
+} from "./actions";
+import {
+  CommandObservation,
+  ErrorObservation,
+  OpenHandsObservation,
+} from "./observations";
+
+export const isOpenHandsAction = (
+  event: OpenHandsParsedEvent,
+): event is OpenHandsAction => "action" in event;
+
+export const isOpenHandsObservation = (
+  event: OpenHandsParsedEvent,
+): event is OpenHandsObservation => "observation" in event;
+
+export const isUserMessage = (
+  event: OpenHandsParsedEvent,
+): event is UserMessageAction =>
+  isOpenHandsAction(event) &&
+  event.source === "user" &&
+  event.action === "message";
+
+export const isAssistantMessage = (
+  event: OpenHandsParsedEvent,
+): event is AssistantMessageAction =>
+  isOpenHandsAction(event) &&
+  event.source === "agent" &&
+  (event.action === "message" || event.action === "finish");
+
+export const isErrorObservation = (
+  event: OpenHandsParsedEvent,
+): event is ErrorObservation =>
+  isOpenHandsObservation(event) && event.observation === "error";
+
+export const isCommandObservation = (
+  event: OpenHandsParsedEvent,
+): event is CommandObservation =>
+  isOpenHandsObservation(event) && event.observation === "run";
+
+export const isFinishAction = (
+  event: OpenHandsParsedEvent,
+): event is AssistantMessageAction =>
+  isOpenHandsAction(event) && event.action === "finish";
+
+export const isSystemMessage = (
+  event: OpenHandsParsedEvent,
+): event is SystemMessageAction =>
+  isOpenHandsAction(event) && event.action === "system";
+
+export const isRejectObservation = (
+  event: OpenHandsParsedEvent,
+): event is OpenHandsObservation =>
+  isOpenHandsObservation(event) && event.observation === "user_rejected";
--- a/frontend/src/types/core/observations.ts
+++ b/frontend/src/types/core/observations.ts
@@ -23,6 +23,7 @@ export interface IPythonObservation
  source: "agent";
  extras: {
    code: string;
+    image_urls?: string[];
  };
 }

@@ -123,6 +124,7 @@ export interface RecallObservation extends OpenHandsObservationEvent<"recall"> {
    repo_directory?: string;
    repo_instructions?: string;
    runtime_hosts?: Record<string, number>;
+    custom_secrets_descriptions?: Record<string, string>;
    additional_agent_instructions?: string;
    date?: string;
    microagent_knowledge?: MicroagentKnowledge[];
@@ -136,6 +138,14 @@ export interface MCPObservation extends OpenHandsObservationEvent<"mcp"> {
  };
 }

+export interface UserRejectedObservation
+  extends OpenHandsObservationEvent<"user_rejected"> {
+  source: "agent";
+  extras: {
+    // Add any specific fields for MCP observations
+  };
+}
+
 export type OpenHandsObservation =
  | AgentStateChangeObservation
  | AgentThinkObservation
@@ -149,4 +159,5 @@ export type OpenHandsObservation =
  | EditObservation
  | ErrorObservation
  | RecallObservation
-  | MCPObservation;
+  | MCPObservation
+  | UserRejectedObservation;
--- a/frontend/src/utils/parse-cell-content.ts
+++ b/frontend/src/utils/parse-cell-content.ts
@@ -1,26 +1,32 @@
-export type JupyterLine = { type: "plaintext" | "image"; content: string };
+export type JupyterLine = {
+  type: "plaintext" | "image";
+  content: string;
+  url?: string;
+};

-const IMAGE_PREFIX = "![image](data:image/png;base64,";
-
-export const parseCellContent = (content: string) => {
+export const parseCellContent = (content: string, imageUrls?: string[]) => {
  const lines: JupyterLine[] = [];
  let currentText = "";

+  // First, process the text content
  for (const line of content.split("\n")) {
-    if (line.startsWith(IMAGE_PREFIX)) {
-      if (currentText) {
-        lines.push({ type: "plaintext", content: currentText });
-        currentText = ""; // Reset after pushing plaintext
-      }
-      lines.push({ type: "image", content: line });
-    } else {
-      currentText += `${line}\n`;
-    }
+    currentText += `${line}\n`;
  }

  if (currentText) {
    lines.push({ type: "plaintext", content: currentText });
  }

+  // Then, add image lines if we have image URLs
+  if (imageUrls && imageUrls.length > 0) {
+    imageUrls.forEach((url) => {
+      lines.push({
+        type: "image",
+        content: `![image](${url})`,
+        url,
+      });
+    });
+  }
+
  return lines;
 };
--- a/microagents/default-tools.md
+++ b/microagents/default-tools.md
@@ -0,0 +1,15 @@
+---
+# This is a repo microagent that is always activated
+# to include necessary default tools implemented with MCP
+name: default-tools
+type: repo
+version: 1.0.0
+agent: CodeActAgent
+mcp_tools:
+  stdio_servers:
+    - name: "fetch"
+      command: "uvx"
+      args: ["mcp-server-fetch"]
+# We leave the body empty because MCP tools will automatically add the
+# tool description for LLMs in tool calls, so there's no need to add extra descriptions.
+---
--- a/microagents/ssh.md
+++ b/microagents/ssh.md
@@ -0,0 +1,137 @@
+---
+name: SSH Microagent
+type: knowledge
+version: 1.0.0
+agent: CodeActAgent
+triggers:
+  - ssh
+  - remote server
+  - remote machine
+  - remote host
+  - remote connection
+  - secure shell
+  - ssh keys
+---
+
+# SSH Microagent
+
+This microagent provides capabilities for establishing and managing SSH connections to remote machines.
+
+## Capabilities
+
+- Establish SSH connections using password or key-based authentication
+- Generate and manage SSH key pairs
+- Configure SSH for easier connections
+- Execute commands on remote machines
+- Transfer files between local and remote machines
+- Manage SSH configurations and known hosts
+
+## Authentication Methods
+
+### Password Authentication
+
+```bash
+ssh username@hostname
+```
+
+When prompted, you should ask the user for their password or a private key.
+
+### Key-Based Authentication
+
+Generate a new SSH key pair:
+```bash
+ssh-keygen -t ed25519 -f ~/.ssh/key_name -C "comment" -N ""
+```
+
+Copy the public key to the remote server:
+```bash
+ssh-copy-id -i ~/.ssh/key_name.pub username@hostname
+```
+
+Connect using the private key:
+```bash
+ssh -i ~/.ssh/key_name username@hostname
+```
+
+## SSH Configuration
+
+Create or edit the SSH config file for easier connections:
+```bash
+mkdir -p ~/.ssh
+cat > ~/.ssh/config << 'EOF'
+Host alias
+    HostName hostname_or_ip
+    User username
+    IdentityFile ~/.ssh/key_name
+    Port 22
+    ServerAliveInterval 60
+EOF
+chmod 600 ~/.ssh/config
+```
+
+Then connect using the alias:
+```bash
+ssh alias
+```
+
+## Common SSH Options
+
+- `-p PORT`: Connect to a specific port
+- `-X`: Enable X11 forwarding
+- `-L local_port:remote_host:remote_port`: Set up local port forwarding
+- `-R remote_port:local_host:local_port`: Set up remote port forwarding
+- `-N`: Do not execute a remote command (useful for port forwarding)
+- `-f`: Run in background
+- `-v`: Verbose mode (add more v's for increased verbosity)
+
+## File Transfer with SCP
+
+Copy a file to the remote server:
+```bash
+scp /path/to/local/file username@hostname:/path/to/remote/directory/
+```
+
+Copy a file from the remote server:
+```bash
+scp username@hostname:/path/to/remote/file /path/to/local/directory/
+```
+
+Copy a directory recursively:
+```bash
+scp -r /path/to/local/directory username@hostname:/path/to/remote/directory/
+```
+
+## SSH Agent
+
+Start the SSH agent:
+```bash
+eval "$(ssh-agent -s)"
+```
+
+Add a key to the agent:
+```bash
+ssh-add ~/.ssh/key_name
+```
+
+## Troubleshooting
+
+- Check SSH service status on remote: `systemctl status sshd`
+- Verify SSH port is open: `nc -zv hostname 22`
+- Debug connection issues: `ssh -vvv username@hostname`
+- Check permissions: SSH private keys should have 600 permissions (`chmod 600 ~/.ssh/key_name`)
+- Verify known_hosts: If host key changed, remove the old entry with `ssh-keygen -R hostname`
+
+## Secure SSH Key Management
+
+### Local Storage with Proper Permissions
+
+The most basic approach is to ensure proper file permissions:
+
+```bash
+# Set correct permissions for private keys
+chmod 600 ~/.ssh/id_ed25519
+# Set correct permissions for public keys
+chmod 644 ~/.ssh/id_ed25519.pub
+# Set correct permissions for SSH directory
+chmod 700 ~/.ssh
+```
--- a/openhands/init.py
+++ b/openhands/init.py
@@ -22,7 +22,7 @@ def get_version():
        pass

    try:
-        from pkg_resources import DistributionNotFound, get_distribution
+        from pkg_resources import DistributionNotFound, get_distribution  # type: ignore

        return get_distribution(__package_name__).version
    except (ImportError, DistributionNotFound):
--- a/openhands/agenthub/codeact_agent/README.md
+++ b/openhands/agenthub/codeact_agent/README.md
@@ -10,7 +10,7 @@ The CodeAct agent operates through a function calling interface. At each turn, t
 2. **CodeAct**: Execute actions through a set of well-defined tools:
   - Execute Linux `bash` commands with `execute_bash`
   - Run Python code in an [IPython](https://ipython.org/) environment with `execute_ipython_cell`
-   - Interact with web browsers using `browser` and `web_read`
+   - Interact with web browsers using `browser` and `fetch`
   - Edit files using `str_replace_editor` or `edit_file`

 ![image](https://github.com/All-Hands-AI/OpenHands/assets/38853559/92b622e3-72ad-4a61-8f41-8c040b6d5fb3)
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -20,7 +20,6 @@ from openhands.agenthub.codeact_agent.tools.str_replace_editor import (
    create_str_replace_editor_tool,
 )
 from openhands.agenthub.codeact_agent.tools.think import ThinkTool
-from openhands.agenthub.codeact_agent.tools.web_read import WebReadTool
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
 from openhands.core.config import AgentConfig
@@ -123,7 +122,6 @@ class CodeActAgent(Agent):
            if sys.platform == 'win32':
                logger.warning('Windows runtime does not support browsing yet')
            else:
-                tools.append(WebReadTool)
                tools.append(BrowserTool)
        if self.config.enable_jupyter:
            tools.append(IPythonTool)
--- a/openhands/agenthub/codeact_agent/function_calling.py
+++ b/openhands/agenthub/codeact_agent/function_calling.py
@@ -15,7 +15,6 @@ from openhands.agenthub.codeact_agent.tools import (
    IPythonTool,
    LLMBasedFileEditTool,
    ThinkTool,
-    WebReadTool,
    create_cmd_run_tool,
    create_str_replace_editor_tool,
 )
@@ -212,16 +211,6 @@ def response_to_actions(
                    )
                action = BrowseInteractiveAction(browser_actions=arguments['code'])

-            # ================================================
-            # WebReadTool (simplified browsing)
-            # ================================================
-            elif tool_call.function.name == WebReadTool['function']['name']:
-                if 'url' not in arguments:
-                    raise FunctionCallValidationError(
-                        f'Missing required argument "url" in tool call {tool_call.function.name}'
-                    )
-                action = BrowseURLAction(url=arguments['url'])
-
            # ================================================
            # MCPAction (MCP)
            # ================================================
--- a/openhands/agenthub/codeact_agent/prompts/additional_info.j2
+++ b/openhands/agenthub/codeact_agent/prompts/additional_info.j2
@@ -8,7 +8,7 @@ At the user's request, repository {{ repository_info.repo_name }} has been clone
 {{ repository_instructions }}
 </REPOSITORY_INSTRUCTIONS>
 {% endif %}
-{% if runtime_info and (runtime_info.available_hosts or runtime_info.additional_agent_instructions) -%}
+{% if runtime_info -%}
 <RUNTIME_INFORMATION>
 {% if runtime_info.available_hosts %}
 The user has access to the following hosts for accessing a web application,
@@ -24,8 +24,21 @@ For example, if you are using vite.config.js, you should set server.host and ser
 {% if runtime_info.additional_agent_instructions %}
 {{ runtime_info.additional_agent_instructions }}
 {% endif %}
+{% if runtime_info.custom_secrets_descriptions %}
+<CUSTOM_SECRETS>
+You are have access to the following environment variables
+{% for secret_name, secret_description in runtime_info.custom_secrets_descriptions.items() %}
+* $**{{ secret_name }}**: {{ secret_description }}
+{% endfor %}
+</CUSTOM_SECRETS>
+{% endif %}
 {% if runtime_info.date %}
 Today's date is {{ runtime_info.date }} (UTC).
 {% endif %}
 </RUNTIME_INFORMATION>
 {% endif %}
+{% if runtime_info and runtime_info.context_message -%}
+<CONTEXT_MESSAGE>
+{{ runtime_info.context_message }}
+</CONTEXT_MESSAGE>
+{% endif %}
--- a/openhands/agenthub/codeact_agent/tools/init.py
+++ b/openhands/agenthub/codeact_agent/tools/init.py
@@ -5,7 +5,6 @@ from .ipython import IPythonTool
 from .llm_based_edit import LLMBasedFileEditTool
 from .str_replace_editor import create_str_replace_editor_tool
 from .think import ThinkTool
-from .web_read import WebReadTool

 __all__ = [
    'BrowserTool',
@@ -14,6 +13,5 @@ __all__ = [
    'IPythonTool',
    'LLMBasedFileEditTool',
    'create_str_replace_editor_tool',
-    'WebReadTool',
    'ThinkTool',
 ]
--- a/openhands/agenthub/codeact_agent/tools/bash.py
+++ b/openhands/agenthub/codeact_agent/tools/bash.py
@@ -2,6 +2,8 @@ import sys

 from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk

+from openhands.llm.tool_names import EXECUTE_BASH_TOOL_NAME
+
 _DETAILED_BASH_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session.

 ### Command Execution
@@ -45,7 +47,7 @@ def create_cmd_run_tool(
    return ChatCompletionToolParam(
        type='function',
        function=ChatCompletionToolParamFunctionChunk(
-            name=refine_prompt('execute_bash'),
+            name=EXECUTE_BASH_TOOL_NAME,
            description=refine_prompt(description),
            parameters={
                'type': 'object',
--- a/openhands/agenthub/codeact_agent/tools/browser.py
+++ b/openhands/agenthub/codeact_agent/tools/browser.py
@@ -1,6 +1,8 @@
 from browsergym.core.action.highlevel import HighLevelActionSet
 from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk

+from openhands.llm.tool_names import BROWSER_TOOL_NAME
+
 # from browsergym/core/action/highlevel.py
 _browser_action_space = HighLevelActionSet(
    subsets=['bid', 'nav'],
@@ -141,7 +143,7 @@ for _, action in _browser_action_space.action_set.items():
 BrowserTool = ChatCompletionToolParam(
    type='function',
    function=ChatCompletionToolParamFunctionChunk(
-        name='browser',
+        name=BROWSER_TOOL_NAME,
        description=_BROWSER_DESCRIPTION,
        parameters={
            'type': 'object',
--- a/openhands/agenthub/codeact_agent/tools/finish.py
+++ b/openhands/agenthub/codeact_agent/tools/finish.py
@@ -1,5 +1,7 @@
 from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk

+from openhands.llm.tool_names import FINISH_TOOL_NAME
+
 _FINISH_DESCRIPTION = """Signals the completion of the current task or conversation.

 Use this tool when:
@@ -18,7 +20,7 @@ The task_completed field should be set to True if you believed you have complete
 FinishTool = ChatCompletionToolParam(
    type='function',
    function=ChatCompletionToolParamFunctionChunk(
-        name='finish',
+        name=FINISH_TOOL_NAME,
        description=_FINISH_DESCRIPTION,
        parameters={
            'type': 'object',
--- a/openhands/agenthub/codeact_agent/tools/str_replace_editor.py
+++ b/openhands/agenthub/codeact_agent/tools/str_replace_editor.py
@@ -1,5 +1,7 @@
 from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk

+from openhands.llm.tool_names import STR_REPLACE_EDITOR_TOOL_NAME
+
 _DETAILED_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format
 * State is persistent across command calls and discussions with the user
 * If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
@@ -55,7 +57,7 @@ def create_str_replace_editor_tool(
    return ChatCompletionToolParam(
        type='function',
        function=ChatCompletionToolParamFunctionChunk(
-            name='str_replace_editor',
+            name=STR_REPLACE_EDITOR_TOOL_NAME,
            description=description,
            parameters={
                'type': 'object',
--- a/openhands/agenthub/codeact_agent/tools/web_read.py
+++ b/openhands/agenthub/codeact_agent/tools/web_read.py
@@ -1,26 +0,0 @@
-from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
-
-_WEB_DESCRIPTION = """Read (convert to markdown) content from a webpage. You should prefer using the `web_read` tool over the `browser` tool, but do use the `browser` tool if you need to interact with a webpage (e.g., click a button, fill out a form, etc.) OR read a webpage that contains images.
-
-You may use the `web_read` tool to read text content from a webpage, and even search the webpage content using a Google search query (e.g., url=`https://www.google.com/search?q=YOUR_QUERY`).
-
-Only the most recently read webpage will be available to read. This means you should not follow a link to a new page until you are done with the information on the current page.
-"""
-
-WebReadTool = ChatCompletionToolParam(
-    type='function',
-    function=ChatCompletionToolParamFunctionChunk(
-        name='web_read',
-        description=_WEB_DESCRIPTION,
-        parameters={
-            'type': 'object',
-            'properties': {
-                'url': {
-                    'type': 'string',
-                    'description': 'The URL of the webpage to read. You can also use a Google search query here (e.g., `https://www.google.com/search?q=YOUR_QUERY`).',
-                }
-            },
-            'required': ['url'],
-        },
-    ),
-)
--- a/openhands/agenthub/readonly_agent/function_calling.py
+++ b/openhands/agenthub/readonly_agent/function_calling.py
@@ -17,7 +17,6 @@ from openhands.agenthub.codeact_agent.function_calling import (
 from openhands.agenthub.codeact_agent.tools import (
    FinishTool,
    ThinkTool,
-    WebReadTool,
 )
 from openhands.agenthub.readonly_agent.tools import (
    GlobTool,
@@ -191,16 +190,6 @@ def response_to_actions(
                glob_cmd = glob_to_cmdrun(pattern, path)
                action = CmdRunAction(command=glob_cmd, is_input=False)

-            # ================================================
-            # WebReadTool (simplified browsing)
-            # ================================================
-            elif tool_call.function.name == WebReadTool['function']['name']:
-                if 'url' not in arguments:
-                    raise FunctionCallValidationError(
-                        f'Missing required argument "url" in tool call {tool_call.function.name}'
-                    )
-                action = BrowseURLAction(url=arguments['url'])
-
            # ================================================
            # MCPAction (MCP)
            # ================================================
@@ -249,7 +238,6 @@ def get_tools() -> list[ChatCompletionToolParam]:
    return [
        ThinkTool,
        FinishTool,
-        WebReadTool,
        GrepTool,
        GlobTool,
        ViewTool,
--- a/openhands/cli/main.py
+++ b/openhands/cli/main.py
@@ -1,7 +1,6 @@
 import asyncio
 import logging
 import sys
-from uuid import uuid4

 from prompt_toolkit.shortcuts import clear

@@ -42,6 +41,7 @@ from openhands.core.setup import (
    create_controller,
    create_memory,
    create_runtime,
+    generate_sid,
    initialize_repository_for_runtime,
 )
 from openhands.events import EventSource, EventStreamSubscriber
@@ -81,6 +81,16 @@ async def cleanup_session(
        if pending:
            await asyncio.wait(pending, timeout=5.0)

+        event_stream = runtime.event_stream
+
+        # Save the final state
+        end_state = controller.get_state()
+        end_state.save_to_session(
+            event_stream.sid,
+            event_stream.file_store,
+            event_stream.user_id,
+        )
+
        # Reset agent, close runtime and controller
        agent.reset()
        runtime.close()
@@ -94,12 +104,13 @@ async def run_session(
    config: AppConfig,
    settings_store: FileSettingsStore,
    current_dir: str,
-    initial_user_action: str | None = None,
+    task_content: str | None = None,
+    session_name: str | None = None,
 ) -> bool:
    reload_microagents = False
    new_session_requested = False

-    sid = str(uuid4())
+    sid = generate_sid(config, session_name)
    is_loaded = asyncio.Event()
    is_paused = asyncio.Event()  # Event to track agent pause requests
    always_confirm_mode = False  # Flag to enable always confirm mode
@@ -120,7 +131,7 @@ async def run_session(
        agent=agent,
    )

-    controller, _ = create_controller(agent, runtime, config)
+    controller, initial_state = create_controller(agent, runtime, config)

    event_stream = runtime.event_stream

@@ -218,10 +229,9 @@ async def run_session(
    def on_event(event: Event) -> None:
        loop.create_task(on_event_async(event))

-    event_stream.subscribe(EventStreamSubscriber.MAIN, on_event, str(uuid4()))
+    event_stream.subscribe(EventStreamSubscriber.MAIN, on_event, sid)

    await runtime.connect()
-    await add_mcp_tools_to_agent(agent, runtime, config.mcp)

    # Initialize repository if needed
    repo_directory = None
@@ -240,6 +250,10 @@ async def run_session(
        repo_directory=repo_directory,
    )

+    # Add MCP tools to the agent
+    if agent.config.enable_mcp:
+        await add_mcp_tools_to_agent(agent, runtime, memory, config.mcp)
+
    # Clear loading animation
    is_loaded.set()

@@ -249,17 +263,38 @@ async def run_session(
    # Show OpenHands banner and session ID
    display_banner(session_id=sid)

-    # Show OpenHands welcome
-    display_welcome_message()
+    welcome_message = 'What do you want to build?'  # from the application
+    initial_message = ''  # from the user

-    if initial_user_action:
-        # If there's an initial user action, enqueue it and do not prompt again
-        display_initial_user_prompt(initial_user_action)
-        event_stream.add_event(
-            MessageAction(content=initial_user_action), EventSource.USER
-        )
+    if task_content:
+        initial_message = task_content
+
+    # If we loaded a state, we are resuming a previous session
+    if initial_state is not None:
+        logger.info(f'Resuming session: {sid}')
+
+        if initial_state.last_error:
+            # If the last session ended in an error, provide a message.
+            initial_message = (
+                'NOTE: the last session ended with an error.'
+                "Let's get back on track. Do NOT resume your task. Ask me about it."
+            )
+        else:
+            # If we are resuming, we already have a task
+            initial_message = ''
+            welcome_message += '\nLoading previous conversation.'
+
+    # Show OpenHands welcome
+    display_welcome_message(welcome_message)
+
+    # The prompt_for_next_task will be triggered if the agent enters AWAITING_USER_INPUT.
+    # If the restored state is already AWAITING_USER_INPUT, on_event_async will handle it.
+
+    if initial_message:
+        display_initial_user_prompt(initial_message)
+        event_stream.add_event(MessageAction(content=initial_message), EventSource.USER)
    else:
-        # Otherwise prompt for the user's first message right away
+        # No session restored, no initial action: prompt for the user's first message
        asyncio.create_task(prompt_for_next_task(''))

    await run_agent_until_done(
@@ -334,7 +369,12 @@ async def main(loop: asyncio.AbstractEventLoop) -> None:

    # Run the first session
    new_session_requested = await run_session(
-        loop, config, settings_store, current_dir, task_str
+        loop,
+        config,
+        settings_store,
+        current_dir,
+        task_str,
+        session_name=args.name,
    )

    # If a new session was requested, run it
--- a/openhands/cli/tui.py
+++ b/openhands/cli/tui.py
@@ -145,14 +145,20 @@ def display_banner(session_id: str) -> None:
    print_formatted_text('')


-def display_welcome_message() -> None:
+def display_welcome_message(message: str = '') -> None:
    print_formatted_text(
        HTML("<gold>Let's start building!</gold>\n"), style=DEFAULT_STYLE
    )
-    print_formatted_text(
-        HTML('What do you want to build? <grey>Type /help for help</grey>'),
-        style=DEFAULT_STYLE,
-    )
+    if message:
+        print_formatted_text(
+            HTML(f'{message} <grey>Type /help for help</grey>'),
+            style=DEFAULT_STYLE,
+        )
+    else:
+        print_formatted_text(
+            HTML('What do you want to build? <grey>Type /help for help</grey>'),
+            style=DEFAULT_STYLE,
+        )


 def display_initial_user_prompt(prompt: str) -> None:
--- a/openhands/controller/state/state.py
+++ b/openhands/controller/state/state.py
@@ -8,7 +8,6 @@ from enum import Enum
 from typing import Any

 import openhands
-from openhands.controller.state.task import RootTask
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.schema import AgentState
 from openhands.events.action import (
@@ -75,7 +74,6 @@ class State:
      - additional task-specific data
    """

-    root_task: RootTask = field(default_factory=RootTask)
    session_id: str = ''
    # global iteration for the current task
    iteration: int = 0
--- a/openhands/controller/state/task.py
+++ b/openhands/controller/state/task.py
@@ -1,234 +0,0 @@
-from __future__ import annotations
-
-from openhands.core.exceptions import (
-    LLMMalformedActionError,
-    TaskInvalidStateError,
-)
-from openhands.core.logger import openhands_logger as logger
-
-OPEN_STATE = 'open'
-COMPLETED_STATE = 'completed'
-ABANDONED_STATE = 'abandoned'
-IN_PROGRESS_STATE = 'in_progress'
-VERIFIED_STATE = 'verified'
-STATES = [
-    OPEN_STATE,
-    COMPLETED_STATE,
-    ABANDONED_STATE,
-    IN_PROGRESS_STATE,
-    VERIFIED_STATE,
-]
-
-
-class Task:
-    id: str
-    goal: str
-    parent: 'Task' | None
-    subtasks: list['Task']
-
-    def __init__(
-        self,
-        parent: 'Task',
-        goal: str,
-        state: str = OPEN_STATE,
-        subtasks: list[dict | 'Task'] | None = None,  # noqa: B006
-    ) -> None:
-        """Initializes a new instance of the Task class.
-
-        Args:
-            parent: The parent task, or None if it is the root task.
-            goal: The goal of the task.
-            state: The initial state of the task.
-            subtasks: A list of subtasks associated with this task.
-        """
-        if subtasks is None:
-            subtasks = []
-        if parent.id:
-            self.id = parent.id + '.' + str(len(parent.subtasks))
-        else:
-            self.id = str(len(parent.subtasks))
-        self.parent = parent
-        self.goal = goal
-        logger.debug(f'Creating task {self.id} with parent={parent.id}, goal={goal}')
-        self.subtasks = []
-        for subtask in subtasks or []:
-            if isinstance(subtask, Task):
-                self.subtasks.append(subtask)
-            else:
-                goal = str(subtask.get('goal', ''))
-                state = str(subtask.get('state', OPEN_STATE))
-                subtasks = subtask.get('subtasks')
-                logger.debug(f'Reading: {goal}, {state}, {subtasks}')
-                self.subtasks.append(Task(self, goal, state, subtasks))
-
-        self.state = OPEN_STATE
-
-    def to_string(self, indent: str = '') -> str:
-        """Returns a string representation of the task and its subtasks.
-
-        Args:
-            indent: The indentation string for formatting the output.
-
-        Returns:
-            A string representation of the task and its subtasks.
-        """
-        emoji = ''
-        if self.state == VERIFIED_STATE:
-            emoji = '✅'
-        elif self.state == COMPLETED_STATE:
-            emoji = '🟢'
-        elif self.state == ABANDONED_STATE:
-            emoji = '❌'
-        elif self.state == IN_PROGRESS_STATE:
-            emoji = '💪'
-        elif self.state == OPEN_STATE:
-            emoji = '🔵'
-        result = indent + emoji + ' ' + self.id + ' ' + self.goal + '\n'
-        for subtask in self.subtasks:
-            result += subtask.to_string(indent + '    ')
-        return result
-
-    def to_dict(self) -> dict:
-        """Returns a dictionary representation of the task.
-
-        Returns:
-            A dictionary containing the task's attributes.
-        """
-        return {
-            'id': self.id,
-            'goal': self.goal,
-            'state': self.state,
-            'subtasks': [t.to_dict() for t in self.subtasks],
-        }
-
-    def set_state(self, state: str) -> None:
-        """Sets the state of the task and its subtasks.
-
-        Args:
-            state: The new state of the task.
-
-        Raises:
-            TaskInvalidStateError: If the provided state is invalid.
-        """
-        if state not in STATES:
-            logger.error('Invalid state: %s', state)
-            raise TaskInvalidStateError(state)
-        self.state = state
-        if (
-            state == COMPLETED_STATE
-            or state == ABANDONED_STATE
-            or state == VERIFIED_STATE
-        ):
-            for subtask in self.subtasks:
-                if subtask.state != ABANDONED_STATE:
-                    subtask.set_state(state)
-        elif state == IN_PROGRESS_STATE:
-            if self.parent is not None:
-                self.parent.set_state(state)
-
-    def get_current_task(self) -> 'Task' | None:
-        """Retrieves the current task in progress.
-
-        Returns:
-            The current task in progress, or None if no task is in progress.
-        """
-        for subtask in self.subtasks:
-            if subtask.state == IN_PROGRESS_STATE:
-                return subtask.get_current_task()
-        if self.state == IN_PROGRESS_STATE:
-            return self
-        return None
-
-
-class RootTask(Task):
-    """Serves as the root node in a tree of tasks.
-    Because we want the top-level of the root_task to be a list of tasks (1, 2, 3, etc.),
-    the "root node" of the data structure is kind of invisible--it just
-    holds references to the top-level tasks.
-
-    Attributes:
-        id: Kept blank for root_task
-        goal: Kept blank for root_task
-        parent: None for root_task
-        subtasks: The top-level list of tasks associated with the root_task.
-        state: The state of the root_task.
-    """
-
-    id: str = ''
-    goal: str = ''
-    parent: None = None
-
-    def __init__(self) -> None:
-        self.subtasks = []
-        self.state = OPEN_STATE
-
-    def __str__(self) -> str:
-        """Returns a string representation of the root_task.
-
-        Returns:
-            A string representation of the root_task.
-        """
-        return self.to_string()
-
-    def get_task_by_id(self, id: str) -> Task:
-        """Retrieves a task by its ID.
-
-        Args:
-            id: The ID of the task.
-
-        Returns:
-            The task with the specified ID.
-
-        Raises:
-            AgentMalformedActionError: If the provided task ID is invalid or does not exist.
-        """
-        if id == '':
-            return self
-        if len(self.subtasks) == 0:
-            raise LLMMalformedActionError('Task does not exist:' + id)
-        try:
-            parts = [int(p) for p in id.split('.')]
-        except ValueError:
-            raise LLMMalformedActionError('Invalid task id:' + id)
-        task: Task = self
-        for part in parts:
-            if part >= len(task.subtasks):
-                raise LLMMalformedActionError('Task does not exist:' + id)
-            task = task.subtasks[part]
-        return task
-
-    def add_subtask(
-        self,
-        parent_id: str,
-        goal: str,
-        subtasks: list[dict | Task] | None = None,
-    ) -> None:
-        """Adds a subtask to a parent task.
-
-        Args:
-            parent_id: The ID of the parent task.
-            goal: The goal of the subtask.
-            subtasks: A list of subtasks associated with the new subtask.
-        """
-        subtasks = subtasks or []
-        parent = self.get_task_by_id(parent_id)
-        child = Task(parent=parent, goal=goal, subtasks=subtasks)
-        parent.subtasks.append(child)
-
-    def set_subtask_state(self, id: str, state: str) -> None:
-        """Sets the state of a subtask.
-
-        Args:
-            id: The ID of the subtask.
-            state: The new state of the subtask.
-        """
-        task = self.get_task_by_id(id)
-        logger.debug('Setting task {task.id} from state {task.state} to {state}')
-        task.set_state(state)
-        unfinished_tasks = [
-            t
-            for t in self.subtasks
-            if t.state not in [COMPLETED_STATE, VERIFIED_STATE, ABANDONED_STATE]
-        ]
-        if len(unfinished_tasks) == 0:
-            self.set_state(COMPLETED_STATE)
--- a/openhands/core/config/agent_config.py
+++ b/openhands/core/config/agent_config.py
@@ -28,6 +28,8 @@ class AgentConfig(BaseModel):
    """Whether to enable finish tool"""
    enable_prompt_extensions: bool = Field(default=True)
    """Whether to enable prompt extensions"""
+    enable_mcp: bool = Field(default=True)
+    """Whether to enable MCP tools"""
    disabled_microagents: list[str] = Field(default_factory=list)
    """A list of microagents to disable (by name, without .py extension, e.g. ["github", "lint"]). Default is None."""
    enable_history_truncation: bool = Field(default=True)
--- a/openhands/core/main.py
+++ b/openhands/core/main.py
@@ -116,8 +116,6 @@ async def run_controller(
                selected_repository=config.sandbox.selected_repo,
            )

-    await add_mcp_tools_to_agent(agent, runtime, config.mcp)
-
    event_stream = runtime.event_stream

    # when memory is created, it will load the microagents from the selected repository
@@ -130,6 +128,10 @@ async def run_controller(
            repo_directory=repo_directory,
        )

+    # Add MCP tools to the agent
+    if agent.config.enable_mcp:
+        await add_mcp_tools_to_agent(agent, runtime, memory, config.mcp)
+
    replay_events: list[Event] | None = None
    if config.replay_trajectory_path:
        logger.info('Trajectory replay is enabled')
--- a/openhands/core/setup.py
+++ b/openhands/core/setup.py
@@ -154,7 +154,7 @@ def create_memory(

    if runtime:
        # sets available hosts
-        memory.set_runtime_info(runtime)
+        memory.set_contextual_info(runtime, {})

        # loads microagents from repo/.openhands/microagents
        microagents: list[BaseMicroagent] = runtime.get_microagents_from_selected_repo(
--- a/openhands/events/event_store.py
+++ b/openhands/events/event_store.py
@@ -78,6 +78,7 @@ class EventStore(EventStoreABC):
        end_id: int | None = None,
        reverse: bool = False,
        filter: EventFilter | None = None,
+        limit: int | None = None,
    ) -> Iterable[Event]:
        """
        Retrieve events from the event stream, optionally filtering out events of a given type
@@ -107,6 +108,7 @@ class EventStore(EventStoreABC):
            step = 1

        cache_page = _DUMMY_PAGE
+        num_results = 0
        for index in range(start_id, end_id, step):
            if not should_continue():
                return
@@ -121,6 +123,9 @@ class EventStore(EventStoreABC):
            if event:
                if not filter or filter.include(event):
                    yield event
+                    num_results += 1
+                    if limit and limit <= num_results:
+                        return

    def get_event(self, id: int) -> Event:
        filename = self._get_filename_for_id(id, self.user_id)
--- a/openhands/events/event_store_abc.py
+++ b/openhands/events/event_store_abc.py
@@ -23,6 +23,7 @@ class EventStoreABC:
        end_id: int | None = None,
        reverse: bool = False,
        filter: EventFilter | None = None,
+        limit: int | None = None,
    ) -> Iterable[Event]:
        """
        Retrieve events from the event stream, optionally excluding events using a filter
--- a/openhands/events/observation/agent.py
+++ b/openhands/events/observation/agent.py
@@ -74,6 +74,8 @@ class RecallObservation(Observation):
    runtime_hosts: dict[str, int] = field(default_factory=dict)
    additional_agent_instructions: str = ''
    date: str = ''
+    custom_secrets_descriptions: dict[str, str] = field(default_factory=dict)
+    context_message: str | None = None

    # knowledge
    microagent_knowledge: list[MicroagentKnowledge] = field(default_factory=list)
@@ -114,7 +116,8 @@ class RecallObservation(Observation):
                    f'repo_instructions={self.repo_instructions[:20]}...',
                    f'runtime_hosts={self.runtime_hosts}',
                    f'additional_agent_instructions={self.additional_agent_instructions[:20]}...',
-                    f'date={self.date}',
+                    f'date={self.date}'
+                    f'custom_secrets_descriptions={self.custom_secrets_descriptions}',
                ]
            )
        else:
--- a/openhands/events/observation/commands.py
+++ b/openhands/events/observation/commands.py
@@ -170,6 +170,7 @@ class IPythonRunCellObservation(Observation):

    code: str
    observation: str = ObservationType.RUN_IPYTHON
+    image_urls: list[str] | None = None

    @property
    def error(self) -> bool:
@@ -184,4 +185,7 @@ class IPythonRunCellObservation(Observation):
        return True  # IPython cells are always considered successful

    def __str__(self) -> str:
-        return f'**IPythonRunCellObservation**\n{self.content}'
+        result = f'**IPythonRunCellObservation**\n{self.content}'
+        if self.image_urls:
+            result += f'\nImages: {len(self.image_urls)}'
+        return result
--- a/openhands/llm/fn_call_converter.py
+++ b/openhands/llm/fn_call_converter.py
@@ -18,6 +18,12 @@ from openhands.core.exceptions import (
    FunctionCallConversionError,
    FunctionCallValidationError,
 )
+from openhands.llm.tool_names import (
+    BROWSER_TOOL_NAME,
+    EXECUTE_BASH_TOOL_NAME,
+    FINISH_TOOL_NAME,
+    STR_REPLACE_EDITOR_TOOL_NAME,
+)

 # Inspired by: https://docs.together.ai/docs/llama-3-function-calling#function-calling-w-llama-31-70b
 SYSTEM_PROMPT_SUFFIX_TEMPLATE = """
@@ -55,14 +61,12 @@ def refine_prompt(prompt: str) -> str:
    return prompt


-# NOTE: we need to make sure this example is always in-sync with the tool interface designed in openhands/agenthub/codeact_agent/function_calling.py
-IN_CONTEXT_LEARNING_EXAMPLE_PREFIX = refine_prompt("""
-Here's a running example of how to perform a task with the provided tools.
-
--------------------- START OF EXAMPLE ---------------------
-
-USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.
+# NOTE: we need to make sure these examples are always in-sync with the tool interface designed in openhands/agenthub/codeact_agent/function_calling.py

+# Example snippets for each tool
+TOOL_EXAMPLES = {
+    'execute_bash': {
+        'check_dir': """
 ASSISTANT: Sure! Let me first check the current directory:
 <function=execute_bash>
 <parameter=command>
@@ -73,29 +77,10 @@ pwd && ls
 USER: EXECUTION RESULT of [execute_bash]:
 /workspace
 openhands@runtime:~/workspace$
-
-ASSISTANT: There is no `app.py` file in the current directory. Let me create a Python file `app.py`:
-<function=str_replace_editor>
-<parameter=command>create</parameter>
-<parameter=path>/workspace/app.py</parameter>
-<parameter=file_text>
-from flask import Flask
-app = Flask(__name__)
-
-@app.route('/')
-def index() -> str:
-    numbers = list(range(1, 11))
-    return str(numbers)
-
-if __name__ == '__main__':
-    app.run(port=5000)
-</parameter>
-</function>
-
-USER: EXECUTION RESULT of [str_replace_editor]:
-File created successfully at: /workspace/app.py
-
-ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you:
+""",
+        'run_server': """
+ASSISTANT:
+Let me run the Python file for you:
 <function=execute_bash>
 <parameter=command>
 python3 app.py > server.log 2>&1 &
@@ -106,7 +91,8 @@ USER: EXECUTION RESULT of [execute_bash]:
 [1] 121
 [1]+  Exit 1                  python3 app.py > server.log 2>&1

-ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log:
+ASSISTANT:
+Looks like the server was running with PID 121 then crashed. Let me check the server log:
 <function=execute_bash>
 <parameter=command>
 cat server.log
@@ -119,7 +105,8 @@ Traceback (most recent call last):
    from flask import Flask
 ModuleNotFoundError: No module named 'flask'

-ASSISTANT: Looks like the server crashed because the `flask` module is not installed. Let me install the `flask` module for you:
+ASSISTANT:
+Looks like the server crashed because the `flask` module is not installed. Let me install the `flask` module for you:
 <function=execute_bash>
 <parameter=command>
 pip3 install flask
@@ -143,7 +130,8 @@ Requirement already satisfied: MarkupSafe>=2.0 in /home/openhands/.local/lib/pyt
 Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask
 Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0

-ASSISTANT: The `flask` module is installed successfully. Let me run the Python file again:
+ASSISTANT:
+The `flask` module is installed successfully. Let me run the Python file again:
 <function=execute_bash>
 <parameter=command>
 python3 app.py > server.log 2>&1 &
@@ -170,7 +158,8 @@ Press CTRL+C to quit

 ASSISTANT:
 The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
-
+""",
+        'kill_server': """
 USER: Now kill the server, make it display the numbers in a table format.

 ASSISTANT:
@@ -183,7 +172,48 @@ kill 124

 USER: EXECUTION RESULT of [execute_bash]:
 [1]+  Terminated              python3 app.py > server.log 2>&1
+""",
+        'run_server_again': """
+ASSISTANT:
+Running the updated file:
+<function=execute_bash>
+<parameter=command>
+python3 app.py > server.log 2>&1 &
+</parameter>
+</function>

+USER: EXECUTION RESULT of [execute_bash]:
+[1] 126
+
+ASSISTANT:
+The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000.
+""",
+    },
+    'str_replace_editor': {
+        'create_file': """
+ASSISTANT:
+There is no `app.py` file in the current directory. Let me create a Python file `app.py`:
+<function=str_replace_editor>
+<parameter=command>create</parameter>
+<parameter=path>/workspace/app.py</parameter>
+<parameter=file_text>
+from flask import Flask
+app = Flask(__name__)
+
+@app.route('/')
+def index():
+    numbers = list(range(1, 11))
+    return str(numbers)
+
+if __name__ == '__main__':
+    app.run(port=5000)
+</parameter>
+</function>
+
+USER: EXECUTION RESULT of [str_replace_editor]:
+File created successfully at: /workspace/app.py
+""",
+        'edit_file': """
 ASSISTANT:
 Now let me display the numbers in a table format:
 <function=str_replace_editor>
@@ -205,28 +235,100 @@ The file /workspace/app.py has been edited. Here's the result of running `cat -n
    10      app.run(port=5000)
 Review the changes and make sure they are as expected. Edit the file again if necessary.

+""",
+    },
+    'browser': {
+        'view_page': """
 ASSISTANT:
-Running the updated file:
-<function=execute_bash>
-<parameter=command>
-python3 app.py > server.log 2>&1 &
+Let me check how the page looks in the browser:
+<function=browser>
+<parameter=code>
+goto('http://127.0.0.1:5000')
+noop(1000)  # Wait for page to load
 </parameter>
 </function>

-USER: EXECUTION RESULT of [execute_bash]:
-[1] 126
-
+USER: EXECUTION RESULT of [browser]:
+[Browser shows the numbers in a table format]
+"""
+    },
+    'finish': {
+        'task_completed': """
 ASSISTANT:
 The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Let me know if you have any further requests!
 <function=finish>
+<parameter=message>The task has been completed. The web server is running and displaying numbers 1-10 in a table format at http://127.0.0.1:5000.</parameter>
+<parameter=task_completed>true</parameter>
 </function>
+"""
+    },
+}

+
+def get_example_for_tools(tools: list[dict]) -> str:
+    """Generate an in-context learning example based on available tools."""
+    available_tools = set()
+    for tool in tools:
+        if tool['type'] == 'function':
+            name = tool['function']['name']
+            if name == EXECUTE_BASH_TOOL_NAME:
+                available_tools.add('execute_bash')
+            elif name == STR_REPLACE_EDITOR_TOOL_NAME:
+                available_tools.add('str_replace_editor')
+            elif name == BROWSER_TOOL_NAME:
+                available_tools.add('browser')
+            elif name == FINISH_TOOL_NAME:
+                available_tools.add('finish')
+
+    if not available_tools:
+        return ''
+
+    example = """Here's a running example of how to perform a task with the provided tools.
+
+--------------------- START OF EXAMPLE ---------------------
+
+USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.
+
+"""
+
+    # Build example based on available tools
+    if 'execute_bash' in available_tools:
+        example += TOOL_EXAMPLES['execute_bash']['check_dir']
+
+    if 'str_replace_editor' in available_tools:
+        example += TOOL_EXAMPLES['str_replace_editor']['create_file']
+
+    if 'execute_bash' in available_tools:
+        example += TOOL_EXAMPLES['execute_bash']['run_server']
+
+    if 'browser' in available_tools:
+        example += TOOL_EXAMPLES['browser']['view_page']
+
+    if 'execute_bash' in available_tools:
+        example += TOOL_EXAMPLES['execute_bash']['kill_server']
+
+    if 'str_replace_editor' in available_tools:
+        example += TOOL_EXAMPLES['str_replace_editor']['edit_file']
+
+    if 'execute_bash' in available_tools:
+        example += TOOL_EXAMPLES['execute_bash']['run_server_again']
+
+    if 'finish' in available_tools:
+        example += TOOL_EXAMPLES['finish']['task_completed']
+
+    example += """
 --------------------- END OF EXAMPLE ---------------------

 Do NOT assume the environment is the same as in the example above.

 --------------------- NEW TASK DESCRIPTION ---------------------
-""").lstrip()
+"""
+    example = example.lstrip()
+
+    return example
+
+
+IN_CONTEXT_LEARNING_EXAMPLE_PREFIX = get_example_for_tools

 IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX = """
 --------------------- END OF NEW TASK DESCRIPTION ---------------------
@@ -352,74 +454,42 @@ def convert_fncall_messages_to_non_fncall_messages(
            # Add in-context learning example for the first user message
            if not first_user_message_encountered and add_in_context_learning_example:
                first_user_message_encountered = True
-                # Check tools
-                if not (
-                    tools
-                    and len(tools) > 0
-                    and any(
-                        (
-                            tool['type'] == 'function'
-                            and tool['function']['name']
-                            == refine_prompt('execute_bash')
-                            and 'command'
-                            in tool['function']['parameters']['properties']
-                        )
-                        for tool in tools
-                    )
-                    and any(
-                        (
-                            tool['type'] == 'function'
-                            and tool['function']['name'] == 'str_replace_editor'
-                            and 'path' in tool['function']['parameters']['properties']
-                            and 'file_text'
-                            in tool['function']['parameters']['properties']
-                            and 'old_str'
-                            in tool['function']['parameters']['properties']
-                            and 'new_str'
-                            in tool['function']['parameters']['properties']
-                        )
-                        for tool in tools
-                    )
-                ):
-                    raise FunctionCallConversionError(
-                        'The currently provided tool set are NOT compatible with the in-context learning example for FnCall to Non-FnCall conversion. '
-                        'Please update your tool set OR the in-context learning example in openhands/llm/fn_call_converter.py'
-                    )

-                # add in-context learning example
-                if isinstance(content, str):
-                    content = (
-                        IN_CONTEXT_LEARNING_EXAMPLE_PREFIX
-                        + content
-                        + IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX
-                    )
-                elif isinstance(content, list):
-                    if content and content[0]['type'] == 'text':
-                        content[0]['text'] = (
-                            IN_CONTEXT_LEARNING_EXAMPLE_PREFIX
-                            + content[0]['text']
-                            + IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX
-                        )
+                # Generate example based on available tools
+                example = IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(tools)
+
+                # Add example if we have any tools
+                if example:
+                    # add in-context learning example
+                    if isinstance(content, str):
+                        content = example + content + IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX
+                    elif isinstance(content, list):
+                        if content and content[0]['type'] == 'text':
+                            content[0]['text'] = (
+                                example
+                                + content[0]['text']
+                                + IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX
+                            )
+                        else:
+                            content = (
+                                [
+                                    {
+                                        'type': 'text',
+                                        'text': example,
+                                    }
+                                ]
+                                + content
+                                + [
+                                    {
+                                        'type': 'text',
+                                        'text': IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX,
+                                    }
+                                ]
+                            )
                    else:
-                        content = (
-                            [
-                                {
-                                    'type': 'text',
-                                    'text': IN_CONTEXT_LEARNING_EXAMPLE_PREFIX,
-                                }
-                            ]
-                            + content
-                            + [
-                                {
-                                    'type': 'text',
-                                    'text': IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX,
-                                }
-                            ]
+                        raise FunctionCallConversionError(
+                            f'Unexpected content type {type(content)}. Expected str or list. Content: {content}'
                        )
-                else:
-                    raise FunctionCallConversionError(
-                        f'Unexpected content type {type(content)}. Expected str or list. Content: {content}'
-                    )
            converted_messages.append(
                {
                    'role': 'user',
@@ -604,17 +674,28 @@ def convert_non_fncall_messages_to_fncall_messages(
            if not first_user_message_encountered:
                first_user_message_encountered = True
                if isinstance(content, str):
-                    content = content.replace(IN_CONTEXT_LEARNING_EXAMPLE_PREFIX, '')
-                    content = content.replace(IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, '')
+                    # Remove any existing example
+                    if content.startswith(IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(tools)):
+                        content = content.replace(
+                            IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(tools), '', 1
+                        )
+                    if content.endswith(IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX):
+                        content = content.replace(
+                            IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, '', 1
+                        )
                elif isinstance(content, list):
                    for item in content:
                        if item['type'] == 'text':
-                            item['text'] = item['text'].replace(
-                                IN_CONTEXT_LEARNING_EXAMPLE_PREFIX, ''
-                            )
-                            item['text'] = item['text'].replace(
-                                IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, ''
-                            )
+                            # Remove any existing example
+                            example = IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(tools)
+                            if item['text'].startswith(example):
+                                item['text'] = item['text'].replace(example, '', 1)
+                            if item['text'].endswith(
+                                IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX
+                            ):
+                                item['text'] = item['text'].replace(
+                                    IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, '', 1
+                                )
                else:
                    raise FunctionCallConversionError(
                        f'Unexpected content type {type(content)}. Expected str or list. Content: {content}'
--- a/openhands/llm/tool_names.py
+++ b/openhands/llm/tool_names.py
@@ -0,0 +1,6 @@
+"""Constants for tool names used in function calling."""
+
+EXECUTE_BASH_TOOL_NAME = 'execute_bash'
+STR_REPLACE_EDITOR_TOOL_NAME = 'str_replace_editor'
+BROWSER_TOOL_NAME = 'browser'
+FINISH_TOOL_NAME = 'finish'
--- a/openhands/mcp/utils.py
+++ b/openhands/mcp/utils.py
@@ -10,6 +10,7 @@ from openhands.events.action.mcp import MCPAction
 from openhands.events.observation.mcp import MCPObservation
 from openhands.events.observation.observation import Observation
 from openhands.mcp.client import MCPClient
+from openhands.memory.memory import Memory
 from openhands.runtime.base import Runtime


@@ -149,7 +150,7 @@ async def call_tool_mcp(mcp_clients: list[MCPClient], action: MCPAction) -> Obse


 async def add_mcp_tools_to_agent(
-    agent: 'Agent', runtime: Runtime, mcp_config: MCPConfig
+    agent: 'Agent', runtime: Runtime, memory: 'Memory', mcp_config: MCPConfig
 ):
    """
    Add MCP tools to an agent.
@@ -165,8 +166,25 @@ async def add_mcp_tools_to_agent(
        'Runtime must be initialized before adding MCP tools'
    )

+    # Add microagent MCP tools if available
+    microagent_mcp_configs = memory.get_microagent_mcp_tools()
+    extra_stdio_servers = []
+    for mcp_config in microagent_mcp_configs:
+        if mcp_config.sse_servers:
+            logger.warning(
+                'Microagent MCP config contains SSE servers, it is not yet supported.'
+            )
+
+        if mcp_config.stdio_servers:
+            for stdio_server in mcp_config.stdio_servers:
+                # Check if this stdio server is already in the config
+                if stdio_server not in extra_stdio_servers:
+                    extra_stdio_servers.append(stdio_server)
+                    logger.info(f'Added microagent stdio server: {stdio_server.name}')
+
    # Add the runtime as another MCP server
-    updated_mcp_config = runtime.get_updated_mcp_config()
+    updated_mcp_config = runtime.get_updated_mcp_config(extra_stdio_servers)
+
    # Fetch the MCP tools
    mcp_tools = await fetch_mcp_tools_from_config(updated_mcp_config)

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
openhands	7561518e4c	Fix lint errors	2025-05-19 16:55:19 +00:00
openhands	2cd503f033	Fix merge conflicts with main branch	2025-05-19 16:53:24 +00:00
dependabot[bot]	470687f826	chore(deps): bump the mcp-packages group with 2 updates (#8546 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-05-19 18:37:11 +02:00
tofarr	38b4d93237	Add Session API Key Authentication for Runtime Communication (#8550 )	2025-05-19 09:59:22 -06:00
dependabot[bot]	872b97a3c8	chore(deps): bump the version-all group across 1 directory with 20 updates (#8545 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: amanape <83104063+amanape@users.noreply.github.com>	2025-05-19 15:49:53 +00:00
sp.wack	14334040f1	chore(frontend): Refactor chat interface-related event handling (#8403 )	2025-05-19 15:15:09 +00:00
sp.wack	b244138ec5	fix(frontend): Prevent making too many calls to `/git/changes` on conversation load (#8579 )	2025-05-19 18:57:18 +04:00
Xingyao Wang	4a3d2e6859	Fix #8551 : Show images produced in Jupyter Notebook to LLM directly (#8552 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-19 14:14:00 +00:00
luolin101	1a3cb16ba6	add Visual SWE-bench benchmark (#7131 ) Co-authored-by: tsukimi <yuailun@pku.edu.cn> Co-authored-by: Ryan H. Tran <descience.thh10@gmail.com>	2025-05-19 12:08:46 +07:00
Xingyao Wang	2ecc39ffcc	[eval]: disable MCP for SWE-Bench evaluation (#8574 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: Engel Nyst <engel.nyst@gmail.com>	2025-05-19 01:32:46 +00:00
Graham Neubig	0b26174d60	Add documentation microagent (#8563 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-18 16:34:15 -04:00
Calvin Smith	b0005d4e09	Limit size of browser events (#8559 ) Co-authored-by: Calvin Smith <calvin@all-hands.dev>	2025-05-18 11:35:09 -06:00
Graham Neubig	2dc7b37fe8	Fix flaky TestLocalFileStore tests (#8569 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-18 06:37:28 -04:00
openhands	2d434ad49f	Rename RuntimeInfo to ContextualInfo	2025-05-17 22:31:31 +00:00
openhands	81c0253d53	Rename memory.set_runtime_info to memory.set_contextual_info	2025-05-17 22:27:48 +00:00
openhands	9cdde313d8	Add context_message parameter to conversation creation endpoint	2025-05-17 22:22:15 +00:00
Carlos Freund	27c18f5bdd	build(makefile) Develop in OpenhandsCloud (#7440 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>	2025-05-17 14:40:46 +00:00
Graham Neubig	5077fea5c7	Fix: Run setup.sh script in GitHub resolver (#8548 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-17 09:52:34 -04:00
Justin Coffi	bf383b4881	Add SSH Microagent (#8436 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Xingyao Wang <xingyaoww@gmail.com>	2025-05-17 05:56:51 +00:00
OpenHands	c17b0ebfc6	Fix issue #8304 : [Bug]: Non-native tool use converter fails when builtin tools are disabled (#8310 ) Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: Engel Nyst <engel.nyst@gmail.com> Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>	2025-05-17 06:37:45 +02:00
Xingyao Wang	1f390430e5	feat(MCP, microagent): MCP-support for Repo Microagent & add fetch as default tool (#8360 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>	2025-05-16 23:32:38 +00:00
Robert Brennan	819bad0777	Fix: Only show login modal for genuine 401 errors, not connection issues (#8540 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-16 13:33:06 -04:00
dependabot[bot]	2faed14139	chore(deps): bump the version-all group with 5 updates (#8541 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-16 17:17:38 +00:00
tofarr	4733270e3c	Add event search endpoints with filtering and pagination (#8538 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-16 09:51:40 -06:00
Robert Brennan	21dd91de63	Add info logging for 401 Unauthorized responses (#8527 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-16 11:46:15 -04:00
Graham Neubig	25619c5a93	Fix #8510 : Improve error messages for invalid microagent format (#8511 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-16 15:01:39 +00:00
Robert Brennan	15f71e7ed6	Update microagent message from "Activated" to "ready" (#8536 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-16 10:44:37 -04:00
mamoodi	5b583e5f27	Organization and user microagents (#8506 )	2025-05-16 10:42:32 -04:00
Rohit Malhotra	c191a17afb	[Fix]: don't access secrets if doesn't exist (#8535 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-16 14:30:16 +00:00
Robert Brennan	8ec5d0e043	Add WebSocket connection documentation (#8404 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-16 10:12:05 -04:00
Robert Brennan	f3f038bb60	Fix mypy error for pkg_resources import (#8537 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-16 14:06:41 +00:00
Robert Brennan	b8d3027cfe	Always enable logout button regardless of GitHub connection status (#8529 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-16 09:09:33 -04:00
Rohit Malhotra	feb04dc65f	Plumb custom secrets to runtime (#8330 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-15 20:06:30 -04:00
Engel Nyst	1f827170f4	Fix resolver test (#8530 )	2025-05-15 21:57:02 +00:00
Engel Nyst	f7cb2d0f64	Restore previous conversation in CLI (#8431 )	2025-05-15 23:47:41 +02:00
tofarr	033788c2d0	API Updates to facilitate nested runtimes. (#8525 )	2025-05-15 15:38:09 -06:00
dependabot[bot]	21d0990be4	chore(deps): bump the version-all group with 7 updates (#8522 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>	2025-05-15 20:17:51 +00:00
tofarr	6227073cff	Add missing Ukrainian translations for secrets-related keys (#8526 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-15 19:25:06 +00:00
Graham Neubig	4c38113cb7	Fix CI to check for missing translations (#8486 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-05-15 13:31:44 -04:00
Emmanuel Ferdman	fb516dfa0f	Remove obsolete task.py file (#8517 ) Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>	2025-05-15 18:54:25 +02:00