Add missing imports to ActionExecutionClient

Move all run_action logic to ActionExecutionClient
Refactor runtime action execution
2026-04-29 03:00:45 -04:00 · 2024-12-25 15:54:31 +00:00 · 2024-12-25 15:52:08 +00:00 · 2024-12-25 15:47:02 +00:00 · 2024-12-24 15:28:27 -05:00 · 2024-12-24 18:08:33 +00:00
320 changed files with 7458 additions and 6033 deletions
--- a/.devcontainer/README.MD
+++ b/.devcontainer/README.MD
@@ -1 +0,0 @@
-The files in this directory configure a development container for GitHub Codespaces.
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,15 +0,0 @@
-{
-	"name": "OpenHands Codespaces",
-	"image": "mcr.microsoft.com/devcontainers/universal",
-	"customizations":{
-        "vscode":{
-            "extensions": [
-                "ms-python.python"
-            ]
-        }
-    },
-	"onCreateCommand": "sh ./.devcontainer/on_create.sh",
-	"postCreateCommand": "make build",
-	"postStartCommand": "USE_HOST_NETWORK=True nohup bash -c 'make run &'"
-
-}
--- a/.devcontainer/on_create.sh
+++ b/.devcontainer/on_create.sh
@@ -1,6 +0,0 @@
-#!/usr/bin/env bash
-sudo apt update
-sudo apt install -y netcat
-sudo add-apt-repository -y ppa:deadsnakes/ppa
-sudo apt install -y python3.12
-curl -sSL https://install.python-poetry.org | python3.12 -
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -18,7 +18,7 @@ updates:
          - "chromadb"
      browsergym:
        patterns:
-          - "browsergym"
+          - "browsergym*"
      security-all:
        applies-to: "security-updates"
        patterns:
--- a/.github/workflows/openhands-resolver.yml
+++ b/.github/workflows/openhands-resolver.yml
@@ -59,7 +59,6 @@ jobs:
      github.event_name == 'workflow_call' ||
      github.event.label.name == 'fix-me' ||
      github.event.label.name == 'fix-me-experimental' ||
-
      (
        ((github.event_name == 'issue_comment' || github.event_name == 'pull_request_review_comment') &&
        contains(github.event.comment.body, inputs.macro || '@openhands-agent') &&
@@ -117,7 +116,7 @@ jobs:
          PAT_USERNAME: ${{ secrets.PAT_USERNAME }}
          GITHUB_TOKEN: ${{ github.token }}
        run: |
-          required_vars=("LLM_MODEL" "LLM_API_KEY")
+          required_vars=("LLM_API_KEY")
          for var in "${required_vars[@]}"; do
            if [ -z "${!var}" ]; then
              echo "Error: Required environment variable $var is not set."
@@ -126,29 +125,33 @@ jobs:
          done

          # Check optional variables and warn about fallbacks
-          if [ -z "$PAT_TOKEN" ]; then
-            echo "Warning: PAT_TOKEN is not set, falling back to GITHUB_TOKEN"
-          fi
-
          if [ -z "$LLM_BASE_URL" ]; then
            echo "Warning: LLM_BASE_URL is not set, will use default API endpoint"
          fi

+          if [ -z "$PAT_TOKEN" ]; then
+            echo "Warning: PAT_TOKEN is not set, falling back to GITHUB_TOKEN"
+          fi
+
          if [ -z "$PAT_USERNAME" ]; then
            echo "Warning: PAT_USERNAME is not set, will use openhands-agent"
          fi

      - name: Set environment variables
        run: |
-          if [ -n "${{ github.event.review.body }}" ]; then
+          # Handle pull request events first
+          if [ -n "${{ github.event.pull_request.number }}" ]; then
            echo "ISSUE_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
            echo "ISSUE_TYPE=pr" >> $GITHUB_ENV
+          # Handle pull request review events
+          elif [ -n "${{ github.event.review.body }}" ]; then
+            echo "ISSUE_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
+            echo "ISSUE_TYPE=pr" >> $GITHUB_ENV
+          # Handle issue comment events that reference a PR
          elif [ -n "${{ github.event.issue.pull_request }}" ]; then
            echo "ISSUE_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
            echo "ISSUE_TYPE=pr" >> $GITHUB_ENV
-          elif [ -n "${{ github.event.pull_request.number }}" ]; then
-            echo "ISSUE_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
-            echo "ISSUE_TYPE=pr" >> $GITHUB_ENV
+          # Handle regular issue events
          else
            echo "ISSUE_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
            echo "ISSUE_TYPE=issue" >> $GITHUB_ENV
@@ -181,17 +184,32 @@ jobs:
            });

      - name: Install OpenHands
-        run: |
-          if [[ "${{ github.event.label.name }}" == "fix-me-experimental" ]] ||
-             ([[ "${{ github.event_name }}" == "issue_comment" || "${{ github.event_name }}" == "pull_request_review_comment" ]] &&
-              [[ "${{ github.event.comment.body }}" == "@openhands-agent-exp"* ]]) ||
-             ([[ "${{ github.event_name }}" == "pull_request_review" ]] &&
-              [[ "${{ github.event.review.body }}" == "@openhands-agent-exp"* ]]); then
-            python -m pip install --upgrade pip
-            pip install git+https://github.com/all-hands-ai/openhands.git
-          else
-            python -m pip install --upgrade -r requirements.txt
-          fi
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const commentBody = `${{ github.event.comment.body || '' }}`.trim();
+            const reviewBody = `${{ github.event.review.body || '' }}`.trim();
+            const labelName = `${{ github.event.label.name || '' }}`.trim();
+            const eventName = `${{ github.event_name }}`.trim();
+
+            // Check conditions
+            const isExperimentalLabel = labelName === "fix-me-experimental";
+            const isIssueCommentExperimental =
+              (eventName === "issue_comment" || eventName === "pull_request_review_comment") &&
+              commentBody.includes("@openhands-agent-exp");
+            const isReviewCommentExperimental =
+              eventName === "pull_request_review" && reviewBody.includes("@openhands-agent-exp");
+
+            // Perform package installation
+            if (isExperimentalLabel || isIssueCommentExperimental || isReviewCommentExperimental) {
+              console.log("Installing experimental OpenHands...");
+              await exec.exec("python -m pip install --upgrade pip");
+              await exec.exec("pip install git+https://github.com/all-hands-ai/openhands.git");
+            } else {
+              console.log("Installing from requirements.txt...");
+              await exec.exec("python -m pip install --upgrade pip");
+              await exec.exec("pip install -r requirements.txt");
+            }

      - name: Attempt to resolve issue
        env:
@@ -239,7 +257,8 @@ jobs:
          if [ "${{ steps.check_result.outputs.RESOLUTION_SUCCESS }}" == "true" ]; then
            cd /tmp && python -m openhands.resolver.send_pull_request \
              --issue-number ${{ env.ISSUE_NUMBER }} \
-              --pr-type draft | tee pr_result.txt && \
+              --pr-type draft \
+              --reviewer ${{ github.actor }} | tee pr_result.txt && \
              grep "draft created" pr_result.txt | sed 's/.*\///g' > pr_number.txt
          else
            cd /tmp && python -m openhands.resolver.send_pull_request \
@@ -249,30 +268,58 @@ jobs:
              grep "branch created" branch_result.txt | sed 's/.*\///g; s/.expand=1//g' > branch_name.txt
          fi

-      - name: Comment on issue
+      # Step leaves comment for when agent is invoked on PR
+      - name: Analyze Push Logs (Updated PR or No Changes) # Skip comment if PR update was successful OR leave comment if the agent made no code changes
        uses: actions/github-script@v7
-        if: always() # Comment on issue even if the previous steps fail
+        if: always()
+        env:
+          AGENT_RESPONDED: ${{ env.AGENT_RESPONDED || 'false' }}
        with:
          github-token: ${{ secrets.PAT_TOKEN || github.token }}
          script: |
            const fs = require('fs');
            const issueNumber = ${{ env.ISSUE_NUMBER }};
+            let logContent = '';
+
+            try {
+              logContent = fs.readFileSync('/tmp/pr_result.txt', 'utf8').trim();
+            } catch (error) {
+              console.error('Error reading pr_result.txt file:', error);
+            }
+
+            const noChangesMessage = `No changes to commit for issue #${issueNumber}. Skipping commit.`;
+
+            // Check logs from send_pull_request.py (pushes code to GitHub)
+            if (logContent.includes("Updated pull request")) {
+              console.log("Updated pull request found. Skipping comment.");
+              process.env.AGENT_RESPONDED = 'true';
+            } else if (logContent.includes(noChangesMessage)) {
+              github.rest.issues.createComment({
+                issue_number: issueNumber,
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                body: `The workflow to fix this issue encountered an error. Openhands failed to create any code changes.`
+              });
+              process.env.AGENT_RESPONDED = 'true';
+            }
+
+      # Step leaves comment for when agent is invoked on issue
+      - name: Comment on issue # Comment link to either PR or branch created by agent
+        uses: actions/github-script@v7
+        if: always() # Comment on issue even if the previous steps fail
+        env:
+          AGENT_RESPONDED: ${{ env.AGENT_RESPONDED || 'false' }}
+        with:
+          github-token: ${{ secrets.PAT_TOKEN || github.token }}
+          script: |
+            const fs = require('fs');
+            const path = require('path');
+            const issueNumber = ${{ env.ISSUE_NUMBER }};
            const success = ${{ steps.check_result.outputs.RESOLUTION_SUCCESS }};

            let prNumber = '';
            let branchName = '';
-            let logContent = '';
-            const noChangesMessage = `No changes to commit for issue #${issueNumber}. Skipping commit.`;
-
-            try {
-              if (success){
-                logContent = fs.readFileSync('/tmp/pr_result.txt', 'utf8').trim();
-              } else {
-                logContent = fs.readFileSync('/tmp/branch_result.txt', 'utf8').trim();
-              }
-            } catch (error) {
-              console.error('Error reading results file:', error);
-            }
+            let resultExplanation = '';

            try {
              if (success) {
@@ -284,32 +331,63 @@ jobs:
              console.error('Error reading file:', error);
            }

-            if (logContent.includes(noChangesMessage)) {
-              github.rest.issues.createComment({
-                issue_number: issueNumber,
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                body: `The workflow to fix this issue encountered an error. Openhands failed to create any code changes.`
-              });
-            } else if (success && prNumber) {
+
+            try {
+              if (!success){
+                // Read result_explanation from JSON file for failed resolution
+                const outputFilePath = path.resolve('/tmp/output/output.jsonl');
+                if (fs.existsSync(outputFilePath)) {
+                  const outputContent = fs.readFileSync(outputFilePath, 'utf8');
+                  const jsonLines = outputContent.split('\n').filter(line => line.trim() !== '');
+
+                  if (jsonLines.length > 0) {
+                    // First entry in JSON lines has the key 'result_explanation'
+                    const firstEntry = JSON.parse(jsonLines[0]);
+                    resultExplanation = firstEntry.result_explanation || '';
+                  }
+                }
+              }
+            } catch (error){
+              console.error('Error reading file:', error);
+            }
+
+            // Check "success" log from resolver output
+            if (success && prNumber) {
              github.rest.issues.createComment({
                issue_number: issueNumber,
                owner: context.repo.owner,
                repo: context.repo.repo,
                body: `A potential fix has been generated and a draft PR #${prNumber} has been created. Please review the changes.`
              });
+              process.env.AGENT_RESPONDED = 'true';
            } else if (!success && branchName) {
+              let commentBody = `An attempt was made to automatically fix this issue, but it was unsuccessful. A branch named '${branchName}' has been created with the attempted changes. You can view the branch [here](https://github.com/${context.repo.owner}/${context.repo.repo}/tree/${branchName}). Manual intervention may be required.`;
+
+              if (resultExplanation) {
+                commentBody += `\n\nAdditional details about the failure:\n${resultExplanation}`;
+              }
+
              github.rest.issues.createComment({
                issue_number: issueNumber,
                owner: context.repo.owner,
                repo: context.repo.repo,
-                body: `An attempt was made to automatically fix this issue, but it was unsuccessful. A branch named '${branchName}' has been created with the attempted changes. You can view the branch [here](https://github.com/${context.repo.owner}/${context.repo.repo}/tree/${branchName}). Manual intervention may be required.`
-              });
-            } else {
-              github.rest.issues.createComment({
-                issue_number: issueNumber,
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                body: `The workflow to fix this issue encountered an error. Please check the [workflow logs](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) for more information.`
+                body: commentBody
              });
+              process.env.AGENT_RESPONDED = 'true';
            }
+
+      # Leave error comment when both PR/Issue comment handling fail
+      - name: Fallback Error Comment
+        uses: actions/github-script@v7
+        if: ${{ env.AGENT_RESPONDED == 'false' }} # Only run if no conditions were met in previous steps
+        with:
+          github-token: ${{ secrets.PAT_TOKEN || github.token }}
+          script: |
+            const issueNumber = ${{ env.ISSUE_NUMBER }};
+
+            github.rest.issues.createComment({
+              issue_number: issueNumber,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: `The workflow to fix this issue encountered an error. Please check the [workflow logs](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) for more information.`
+            });
--- a/.github/workflows/py-unit-tests.yml
+++ b/.github/workflows/py-unit-tests.yml
@@ -42,7 +42,7 @@ jobs:
      - name: Build Environment
        run: make build
      - name: Run Tests
-        run: poetry run pytest --forked --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_memory.py
+        run: poetry run pytest --forked -n auto --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_memory.py
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v4
        env:
--- a/.nvmrc
+++ b/.nvmrc
@@ -0,0 +1 @@
+22
--- a/Development.md
+++ b/Development.md
@@ -100,7 +100,7 @@ poetry run pytest ./tests/unit/test_*.py
 To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker container image by
 setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.

-Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.15-nikolaik`
+Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.16-nikolaik`

 ## Develop inside Docker container

--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@
  <a href="https://codecov.io/github/All-Hands-AI/OpenHands?branch=main"><img alt="CodeCov" src="https://img.shields.io/codecov/c/github/All-Hands-AI/OpenHands?style=for-the-badge&color=blue"></a>
  <a href="https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE"><img src="https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="MIT License"></a>
  <br/>
-  <a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2vbfigwev-G03twSpXaErwzYVD4CFiBg"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community"></a>
+  <a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2wkh4pklz-w~h_DVDtEe9H5kyQlcNxVw"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community"></a>
  <a href="https://discord.gg/ESHStjSjD4"><img src="https://img.shields.io/badge/Discord-Join%20Us-purple?logo=discord&logoColor=white&style=for-the-badge" alt="Join our Discord community"></a>
  <a href="https://github.com/All-Hands-AI/OpenHands/blob/main/CREDITS.md"><img src="https://img.shields.io/badge/Project-Credits-blue?style=for-the-badge&color=FFE165&logo=github&logoColor=white" alt="Credits"></a>
  <br/>
@@ -29,6 +29,11 @@ call APIs, and yes—even copy code snippets from StackOverflow.

 Learn more at [docs.all-hands.dev](https://docs.all-hands.dev), or jump to the [Quick Start](#-quick-start).

+> [!IMPORTANT]
+> Using OpenHands for work? We'd love to chat! Fill out
+> [this short form](https://docs.google.com/forms/d/e/1FAIpQLSet3VbGaz8z32gW9Wm-Grl4jpt5WgMXPgJ4EDPVmCETCBpJtQ/viewform)
+> to join our Design Partner program, where you'll get early access to commercial features and the opportunity to provide input on our product roadmap.
+
 ![App screenshot](./docs/static/img/screenshot.png)

 ## ⚡ Quick Start
@@ -38,16 +43,17 @@ See the [Installation](https://docs.all-hands.dev/modules/usage/installation) gu
 system requirements and more information.

 ```bash
-docker pull docker.all-hands.dev/all-hands-ai/runtime:0.15-nikolaik
+docker pull docker.all-hands.dev/all-hands-ai/runtime:0.16-nikolaik

-docker run -it --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.15-nikolaik \
+docker run -it --rm --pull=always \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.16-nikolaik \
    -e LOG_ALL_EVENTS=true \
    -v /var/run/docker.sock:/var/run/docker.sock \
+    -v ~/.openhands:/home/openhands/.openhands \
    -p 3000:3000 \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app \
-    docker.all-hands.dev/all-hands-ai/openhands:0.15
+    docker.all-hands.dev/all-hands-ai/openhands:0.16
 ```

 You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!
@@ -65,6 +71,14 @@ or run it on tagged issues with [a github action](https://github.com/All-Hands-A

 Visit [Installation](https://docs.all-hands.dev/modules/usage/installation) for more information and setup instructions.

+> [!CAUTION]
+> OpenHands is meant to be run by a single user on their local workstation.
+> It is not appropriate for multi-tenant deployments, where multiple users share the same instance--there is no built-in isolation or scalability.
+>
+> If you're interested in running OpenHands in a multi-tenant environment, please
+> [get in touch with us](https://docs.google.com/forms/d/e/1FAIpQLSet3VbGaz8z32gW9Wm-Grl4jpt5WgMXPgJ4EDPVmCETCBpJtQ/viewform)
+> for advanced deployment options.
+
 If you want to modify the OpenHands source code, check out [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md).

 Having issues? The [Troubleshooting Guide](https://docs.all-hands.dev/modules/usage/troubleshooting) can help.
@@ -82,7 +96,7 @@ troubleshooting resources, and advanced configuration options.
 OpenHands is a community-driven project, and we welcome contributions from everyone. We do most of our communication
 through Slack, so this is the best place to start, but we also are happy to have you contact us on Discord or Github:

- [Join our Slack workspace](https://join.slack.com/t/openhands-ai/shared_invite/zt-2vbfigwev-G03twSpXaErwzYVD4CFiBg) - Here we talk about research, architecture, and future development.
+- [Join our Slack workspace](https://join.slack.com/t/openhands-ai/shared_invite/zt-2wkh4pklz-w~h_DVDtEe9H5kyQlcNxVw) - Here we talk about research, architecture, and future development.
 - [Join our Discord server](https://discord.gg/ESHStjSjD4) - This is a community-run server for general discussion, questions, and feedback.
 - [Read or post Github Issues](https://github.com/All-Hands-AI/OpenHands/issues) - Check out the issues we're working on, or add your own ideas.

--- a/compose.yml
+++ b/compose.yml
@@ -7,7 +7,7 @@ services:
    image: openhands:latest
    container_name: openhands-app-${DATE:-}
    environment:
-      - SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.15-nikolaik}
+      - SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.16-nikolaik}
      - SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
      - WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
    ports:
--- a/config.template.toml
+++ b/config.template.toml
@@ -154,6 +154,10 @@ model = "gpt-4o"
 # Drop any unmapped (unsupported) params without causing an exception
 #drop_params = false

+# Modify params for litellm to do transformations like adding a default message, when a message is empty.
+# Note: this setting is global, unlike drop_params, it cannot be overridden in each call to litellm.
+#modify_params = true
+
 # Using the prompt caching feature if provided by the LLM and supported
 #caching_prompt = true

@@ -172,6 +176,10 @@ model = "gpt-4o"
 # If model is vision capable, this option allows to disable image processing (useful for cost reduction).
 #disable_vision = true

+# Custom tokenizer to use for token counting
+# https://docs.litellm.ai/docs/completion/token_usage
+#custom_tokenizer = ""
+
 [llm.gpt4o-mini]
 api_key = "your-api-key"
 model = "gpt-4o"
@@ -217,6 +225,9 @@ llm_config = 'gpt3'
 # Use host network
 #use_host_network = false

+# runtime extra build args
+#runtime_extra_build_args = ["--network=host", "--add-host=host.docker.internal:host-gateway"]
+
 # Enable auto linting after editing
 #enable_auto_lint = false

--- a/containers/app/Dockerfile
+++ b/containers/app/Dockerfile
@@ -42,6 +42,8 @@ ENV USE_HOST_NETWORK=false
 ENV WORKSPACE_BASE=/opt/workspace_base
 ENV OPENHANDS_BUILD_VERSION=$OPENHANDS_BUILD_VERSION
 ENV SANDBOX_USER_ID=0
+ENV FILE_STORE=local
+ENV FILE_STORE_PATH=~/.openhands
 RUN mkdir -p $WORKSPACE_BASE

 RUN apt-get update -y \
--- a/containers/dev/README.md
+++ b/containers/dev/README.md
@@ -1,5 +1,8 @@
 # Develop in Docker

+> [!WARNING]
+> This is not officially supported and may not work.
+
 Install [Docker](https://docs.docker.com/engine/install/) on your host machine and run:

 ```bash
--- a/containers/dev/compose.yml
+++ b/containers/dev/compose.yml
@@ -11,7 +11,7 @@ services:
      - BACKEND_HOST=${BACKEND_HOST:-"0.0.0.0"}
      - SANDBOX_API_HOSTNAME=host.docker.internal
      #
-      - SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.15-nikolaik}
+      - SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.16-nikolaik}
      - SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
      - WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
    ports:
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/about.md
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/about.md
@@ -27,7 +27,7 @@ Pour plus de détails, veuillez consulter [ce document](https://github.com/All-H

 Nous avons à la fois un espace de travail Slack pour la collaboration sur la construction d'OpenHands et un serveur Discord pour discuter de tout ce qui est lié, par exemple, à ce projet, LLM, agent, etc.

- [Espace de travail Slack](https://join.slack.com/t/openhands-ai/shared_invite/zt-2vbfigwev-G03twSpXaErwzYVD4CFiBg)
+- [Espace de travail Slack](https://join.slack.com/t/openhands-ai/shared_invite/zt-2wkh4pklz-w~h_DVDtEe9H5kyQlcNxVw)
 - [Serveur Discord](https://discord.gg/ESHStjSjD4)

 Si vous souhaitez contribuer, n'hésitez pas à rejoindre notre communauté. Simplifions ensemble l'ingénierie logicielle !
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md
@@ -98,4 +98,4 @@ Si vous voyez un message d'erreur indiquant que le port est utilisé ou indispon

 ## Discuter

-Pour d'autres problèmes ou questions rejoignez le [Slack](https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA) ou le [Discord](https://discord.gg/ESHStjSjD4) et demandez!
+Pour d'autres problèmes ou questions rejoignez le [Slack](https://join.slack.com/t/openhands-ai/shared_invite/zt-2wkh4pklz-w~h_DVDtEe9H5kyQlcNxVw) ou le [Discord](https://discord.gg/ESHStjSjD4) et demandez!
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/how-to/custom-sandbox-guide.md
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/how-to/custom-sandbox-guide.md
@@ -80,4 +80,4 @@ Si vous voyez une erreur concernant un port déjà utilisé ou indisponible, ess

 ## Discuter

-Pour d'autres problèmes ou questions, rejoignez le [Slack](https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA) ou le [Discord](https://discord.gg/ESHStjSjD4) et demandez !
+Pour d'autres problèmes ou questions, rejoignez le [Slack](https://join.slack.com/t/openhands-ai/shared_invite/zt-2wkh4pklz-w~h_DVDtEe9H5kyQlcNxVw) ou le [Discord](https://discord.gg/ESHStjSjD4) et demandez !
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/how-to/openshift-example.md
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/how-to/openshift-example.md
@@ -1,338 +0,0 @@
-
-
-# Kubernetes
-
-Il existe différentes façons d'exécuter OpenHands sur Kubernetes ou OpenShift. Ce guide présente une façon possible :
-1. Créer un PV "en tant qu'administrateur du cluster" pour mapper les données workspace_base et le répertoire docker au pod via le nœud worker
-2. Créer un PVC pour pouvoir monter ces PV sur le pod
-3. Créer un pod qui contient deux conteneurs : les conteneurs OpenHands et Sandbox
-
-## Étapes détaillées pour l'exemple ci-dessus
-
-> Remarque : Assurez-vous d'être connecté au cluster avec le compte approprié pour chaque étape. La création de PV nécessite un administrateur de cluster !
-
-> Assurez-vous d'avoir les autorisations de lecture/écriture sur le hostPath utilisé ci-dessous (c'est-à-dire /tmp/workspace)
-
-1. Créer le PV :
-Le fichier yaml d'exemple ci-dessous peut être utilisé par un administrateur de cluster pour créer le PV.
- workspace-pv.yaml
-
-```yamlfile
-apiVersion: v1
-kind: PersistentVolume
-metadata:
-  name: workspace-pv
-spec:
-  capacity:
-    storage: 2Gi
-  accessModes:
-    - ReadWriteOnce
-  persistentVolumeReclaimPolicy: Retain
-  hostPath:
-    path: /tmp/workspace
-```
-
-```bash
-# appliquer le fichier yaml
-$ oc create -f workspace-pv.yaml
-persistentvolume/workspace-pv created
-
-# vérifier :
-$ oc get pv
-NAME                                       CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS      CLAIM                STORAGECLASS     REASON   AGE
-workspace-pv                               2Gi        RWO            Retain           Available                                                  7m23s
-```
-
- docker-pv.yaml
-
-```yamlfile
-apiVersion: v1
-kind: PersistentVolume
-metadata:
-  name: docker-pv
-spec:
-  capacity:
-    storage: 2Gi
-  accessModes:
-    - ReadWriteOnce
-  persistentVolumeReclaimPolicy: Retain
-  hostPath:
-    path: /var/run/docker.sock
-```
-
-```bash
-# appliquer le fichier yaml
-$ oc create -f docker-pv.yaml
-persistentvolume/docker-pv created
-
-# vérifier :
-oc get pv
-NAME                                       CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS      CLAIM                STORAGECLASS     REASON   AGE
-docker-pv                                  2Gi        RWO            Retain           Available                                                  6m55s
-workspace-pv                               2Gi        RWO            Retain           Available                                                  7m23s
-```
-
-2. Créer le PVC :
-Exemple de fichier yaml PVC ci-dessous :
-
- workspace-pvc.yaml
-
-```yamlfile
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: workspace-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 1Gi
-```
-
-```bash
-# créer le pvc
-$ oc create -f workspace-pvc.yaml
-persistentvolumeclaim/workspace-pvc created
-
-# vérifier
-$ oc get pvc
-NAME            STATUS    VOLUME   CAPACITY   ACCESS MODES   STORAGECLASS     AGE
-workspace-pvc   Pending                                      hcloud-volumes   4s
-
-$ oc get events
-LAST SEEN   TYPE     REASON                 OBJECT                                MESSAGE
-8s          Normal   WaitForFirstConsumer   persistentvolumeclaim/workspace-pvc   waiting for first consumer to be created before binding
-```
-
- docker-pvc.yaml
-
-```yamlfile
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: docker-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 1Gi
-```
-
-```bash
-# créer le pvc
-$ oc create -f docker-pvc.yaml
-persistentvolumeclaim/docker-pvc created
-
-# vérifier
-$ oc get pvc
-NAME            STATUS    VOLUME   CAPACITY   ACCESS MODES   STORAGECLASS     AGE
-docker-pvc      Pending                                      hcloud-volumes   4s
-workspace-pvc   Pending                                      hcloud-volumes   2m53s
-
-$ oc get events
-LAST SEEN   TYPE     REASON                 OBJECT                                MESSAGE
-10s         Normal   WaitForFirstConsumer   persistentvolumeclaim/docker-pvc      waiting for first consumer to be created before binding
-10s         Normal   WaitForFirstConsumer   persistentvolumeclaim/workspace-pvc   waiting for first consumer to be created before binding
-```
-
-3. Créer le fichier yaml du pod :
-Exemple de fichier yaml de pod ci-dessous :
-
- pod.yaml
-
-```yamlfile
-apiVersion: v1
-kind: Pod
-metadata:
-  name: openhands-app-2024
-  labels:
-    app: openhands-app-2024
-spec:
-  containers:
-  - name: openhands-app-2024
-    image: ghcr.io/all-hands-ai/openhands:main
-    env:
-    - name: SANDBOX_USER_ID
-      value: "1000"
-    - name: WORKSPACE_MOUNT_PATH
-      value: "/opt/workspace_base"
-    volumeMounts:
-    - name: workspace-volume
-      mountPath: /opt/workspace_base
-    - name: docker-sock
-      mountPath: /var/run/docker.sock
-    ports:
-    - containerPort: 3000
-  - name: openhands-sandbox-2024
-    image: ghcr.io/all-hands-ai/sandbox:main
-    ports:
-    - containerPort: 51963
-    command: ["/usr/sbin/sshd", "-D", "-p 51963", "-o", "PermitRootLogin=yes"]
-  volumes:
-  - name: workspace-volume
-    persistentVolumeClaim:
-      claimName: workspace-pvc
-  - name: docker-sock
-    persistentVolumeClaim:
-      claimName: docker-pvc
-```
-
-
-```bash
-# créer le pod
-$ oc create -f pod.yaml
-W0716 11:22:07.776271  107626 warnings.go:70] would violate PodSecurity "restricted:v1.24": allowPrivilegeEscalation != false (containers "openhands-app-2024", "openhands-sandbox-2024" must set securityContext.allowPrivilegeEscalation=false), unrestricted capabilities (containers "openhands-app-2024", "openhands-sandbox-2024" must set securityContext.capabilities.drop=["ALL"]), runAsNonRoot != true (pod or containers "openhands-app-2024", "openhands-sandbox-2024" must set securityContext.runAsNonRoot=true), seccompProfile (pod or containers "openhands-app-2024", "openhands-sandbox-2024" must set securityContext.seccompProfile.type to "RuntimeDefault" or "Localhost")
-pod/openhands-app-2024 created
-
-# L'avertissement ci-dessus peut être ignoré pour l'instant car nous ne modifierons pas les restrictions SCC.
-
-# vérifier
-$ oc get pods
-NAME                 READY   STATUS    RESTARTS   AGE
-openhands-app-2024   0/2     Pending   0          5s
-
-$ oc get pods
-NAME                 READY   STATUS              RESTARTS   AGE
-openhands-app-2024   0/2     ContainerCreating   0          15s
-
-$ oc get events
-LAST SEEN   TYPE     REASON                   OBJECT                                MESSAGE
-38s         Normal   WaitForFirstConsumer     persistentvolumeclaim/docker-pvc      waiting for first consumer to be created before binding
-23s         Normal   ExternalProvisioning     persistentvolumeclaim/docker-pvc      waiting for a volume to be created, either by external provisioner "csi.hetzner.cloud" or manually created by system administrator
-27s         Normal   Provisioning             persistentvolumeclaim/docker-pvc      External provisioner is provisioning volume for claim "openhands/docker-pvc"
-17s         Normal   ProvisioningSucceeded    persistentvolumeclaim/docker-pvc      Successfully provisioned volume pvc-2b1d223a-1c8f-4990-8e3d-68061a9ae252
-16s         Normal   Scheduled                pod/openhands-app-2024                Successfully assigned All-Hands-AI/OpenHands-app-2024 to worker1.hub.internal.blakane.com
-9s          Normal   SuccessfulAttachVolume   pod/openhands-app-2024                AttachVolume.Attach succeeded for volume "pvc-2b1d223a-1c8f-4990-8e3d-68061a9ae252"
-9s          Normal   SuccessfulAttachVolume   pod/openhands-app-2024                AttachVolume.Attach succeeded for volume "pvc-31f15b25-faad-4665-a25f-201a530379af"
-6s          Normal   AddedInterface           pod/openhands-app-2024                Add eth0 [10.128.2.48/23] from openshift-sdn
-6s          Normal   Pulled                   pod/openhands-app-2024                Container image "ghcr.io/all-hands-ai/openhands:main" already present on machine
-6s          Normal   Created                  pod/openhands-app-2024                Created container openhands-app-2024
-6s          Normal   Started                  pod/openhands-app-2024                Started container openhands-app-2024
-6s          Normal   Pulled                   pod/openhands-app-2024                Container image "ghcr.io/all-hands-ai/sandbox:main" already present on machine
-5s          Normal   Created                  pod/openhands-app-2024                Created container openhands-sandbox-2024
-5s          Normal   Started                  pod/openhands-app-2024                Started container openhands-sandbox-2024
-83s         Normal   WaitForFirstConsumer     persistentvolumeclaim/workspace-pvc   waiting for first consumer to be created before binding
-27s         Normal   Provisioning             persistentvolumeclaim/workspace-pvc   External provisioner is provisioning volume for claim "openhands/workspace-pvc"
-17s         Normal   ProvisioningSucceeded    persistentvolumeclaim/workspace-pvc   Successfully provisioned volume pvc-31f15b25-faad-4665-a25f-201a530379af
-
-$ oc get pods
-NAME                 READY   STATUS    RESTARTS   AGE
-openhands-app-2024   2/2     Running   0          23s
-
-$ oc get pvc
-NAME            STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS     AGE
-docker-pvc      Bound    pvc-2b1d223a-1c8f-4990-8e3d-68061a9ae252   10Gi       RWO            hcloud-volumes   10m
-workspace-pvc   Bound    pvc-31f15b25-faad-4665-a25f-201a530379af   10Gi       RWO            hcloud-volumes   13m
-
-```
-
-4. Créer un service NodePort.
-Exemple de commande de création de service ci-dessous :
-
-```bash
-# créer le service de type NodePort
-$ oc create svc nodeport  openhands-app-2024  --tcp=3000:3000
-service/openhands-app-2024 created
-
-# vérifier
-
-$ oc get svc
-NAME                 TYPE       CLUSTER-IP      EXTERNAL-IP   PORT(S)          AGE
-openhands-app-2024   NodePort   172.30.225.42   <none>        3000:30495/TCP   4s
-
-$ oc describe svc openhands-app-2024
-Name:                     openhands-app-2024
-Namespace:                openhands
-Labels:                   app=openhands-app-2024
-Annotations:              <none>
-Selector:                 app=openhands-app-2024
-Type:                     NodePort
-IP Family Policy:         SingleStack
-IP Families:              IPv4
-IP:                       172.30.225.42
-IPs:                      172.30.225.42
-Port:                     3000-3000  3000/TCP
-TargetPort:               3000/TCP
-NodePort:                 3000-3000  30495/TCP
-Endpoints:                10.128.2.48:3000
-Session Affinity:         None
-External Traffic Policy:  Cluster
-Events:                   <none>
-```
-
-6. Se connecter à l'interface utilisateur d'OpenHands, configurer l'Agent, puis tester :
-
-![image](https://github.com/user-attachments/assets/12f94804-a0c7-4744-b873-e003c9caf40e)
-
-
-
-## Déploiement d'Openhands sur GCP GKE
-
-**Avertissement** : ce déploiement accorde à l'application OpenHands l'accès au socket docker de Kubernetes, ce qui crée un risque de sécurité. Utilisez à vos propres risques.
-1- Créer une politique pour l'accès privilégié
-2- Créer des informations d'identification gke (facultatif)
-3- Créer le déploiement openhands
-4- Commandes de vérification et d'accès à l'interface utilisateur
-5- Dépanner le pod pour vérifier le conteneur interne
-
-1. créer une politique pour l'accès privilégié
-```bash
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
-  name: privileged-role
-rules:
- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["create", "get", "list", "watch", "delete"]
- apiGroups: ["apps"]
-  resources: ["deployments"]
-  verbs: ["create", "get", "list", "watch", "delete"]
- apiGroups: [""]
-  resources: ["pods/exec"]
-  verbs: ["create"]
- apiGroups: [""]
-  resources: ["pods/log"]
-  verbs: ["get"]
---
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
-  name: privileged-role-binding
-roleRef:
-  apiGroup: rbac.authorization.k8s.io
-  kind: ClusterRole
-  name: privileged-role
-subjects:
- kind: ServiceAccount
-  name: default  # Remplacez par le nom de votre compte de service
-  namespace: default
-```
-2. créer des informations d'identification gke (facultatif)
-```bash
-kubectl create secret generic google-cloud-key \
-  --from-file=key.json=/path/to/your/google-cloud-key.json
-  ```
-3. créer le déploiement openhands
-## comme cela est testé pour le nœud worker unique, si vous en avez plusieurs, spécifiez l'indicateur pour le worker unique
-
-```bash
-kind: Deployment
-metadata:
-  name: openhands-app-2024
-  labels:
-    app: openhands-app-2024
-spec:
-  replicas: 1  # Vous pouvez augmenter ce nombre pour plusieurs réplicas
-  selector:
-    matchLabels:
-      app: openhands-app-2024
-  template:
-    metadata:
-      labels:
-        app: openhands-app-2024
-    spec:
-      containers:
-      -
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/intro.mdx
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/intro.mdx
@@ -42,7 +42,7 @@ Explorez le code source d'OpenHands sur [GitHub](https://github.com/All-Hands-AI
  />
 </a>
 <br></br>
-<a href="https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA">
+<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2wkh4pklz-w~h_DVDtEe9H5kyQlcNxVw">
  <img
    src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge"
    alt="Join our Slack community"
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/troubleshooting/troubleshooting.md
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/troubleshooting/troubleshooting.md
@@ -9,7 +9,6 @@ Si vous trouvez plus d'informations ou une solution de contournement pour l'un d
 :::tip
 OpenHands ne prend en charge Windows que via [WSL](https://learn.microsoft.com/en-us/windows/wsl/install).
 Veuillez vous assurer d'exécuter toutes les commandes à l'intérieur de votre terminal WSL.
-Consultez les [Notes pour les utilisateurs de WSL sur Windows](troubleshooting/windows) pour des guides de dépannage.
 :::

 ## Problèmes courants
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/troubleshooting/windows.md
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/troubleshooting/windows.md
@@ -1,66 +0,0 @@
-
-
-# Notes pour les utilisateurs de WSL sur Windows
-
-OpenHands ne prend en charge Windows que via [WSL](https://learn.microsoft.com/en-us/windows/wsl/install).
-Veuillez vous assurer d'exécuter toutes les commandes dans votre terminal WSL.
-
-## Dépannage
-
-### Recommandation : Ne pas exécuter en tant qu'utilisateur root
-
-Pour des raisons de sécurité, il est fortement recommandé de ne pas exécuter OpenHands en tant qu'utilisateur root, mais en tant qu'utilisateur avec un UID non nul.
-
-Références :
-
-* [Pourquoi il est mauvais de se connecter en tant que root](https://askubuntu.com/questions/16178/why-is-it-bad-to-log-in-as-root)
-* [Définir l'utilisateur par défaut dans WSL](https://www.tenforums.com/tutorials/128152-set-default-user-windows-subsystem-linux-distro-windows-10-a.html#option2)
-Astuce concernant la 2ème référence : pour les utilisateurs d'Ubuntu, la commande pourrait en fait être "ubuntupreview" au lieu de "ubuntu".
-
---
-### Erreur : 'docker' n'a pas pu être trouvé dans cette distribution WSL 2.
-
-Si vous utilisez Docker Desktop, assurez-vous de le démarrer avant d'appeler toute commande docker depuis WSL.
-Docker doit également avoir l'option d'intégration WSL activée.
-
---
-### Installation de Poetry
-
-* Si vous rencontrez des problèmes pour exécuter Poetry même après l'avoir installé pendant le processus de build, vous devrez peut-être ajouter son chemin binaire à votre environnement :
-
-```sh
-export PATH="$HOME/.local/bin:$PATH"
-```
-
-* Si make build s'arrête sur une erreur comme celle-ci :
-
-```sh
-ModuleNotFoundError: no module named <module-name>
-```
-
-Cela pourrait être un problème avec le cache de Poetry.
-Essayez d'exécuter ces 2 commandes l'une après l'autre :
-
-```sh
-rm -r ~/.cache/pypoetry
-make build
-```
-
---
-### L'objet NoneType n'a pas d'attribut 'request'
-
-Si vous rencontrez des problèmes liés au réseau, tels que `NoneType object has no attribute 'request'` lors de l'exécution de `make run`, vous devrez peut-être configurer les paramètres réseau de WSL2. Suivez ces étapes :
-
-* Ouvrez ou créez le fichier `.wslconfig` situé à `C:\Users\%username%\.wslconfig` sur votre machine hôte Windows.
-* Ajoutez la configuration suivante au fichier `.wslconfig` :
-
-```sh
-[wsl2]
-networkingMode=mirrored
-localhostForwarding=true
-```
-
-* Enregistrez le fichier `.wslconfig`.
-* Redémarrez complètement WSL2 en quittant toutes les instances WSL2 en cours d'exécution et en exécutant la commande `wsl --shutdown` dans votre invite de commande ou terminal.
-* Après avoir redémarré WSL, essayez d'exécuter à nouveau `make run`.
-Le problème de réseau devrait être résolu.
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/about.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/about.md
@@ -27,7 +27,7 @@ OpenHands 是一个社区驱动的项目，我们欢迎每个人的贡献。无

 我们有 Slack 工作区用于协作构建 OpenHands，也有 Discord 服务器用于讨论任何相关的内容，例如此项目、大语言模型、代理等。

- [Slack 工作区](https://join.slack.com/t/openhands-ai/shared_invite/zt-2vbfigwev-G03twSpXaErwzYVD4CFiBg)
+- [Slack 工作区](https://join.slack.com/t/openhands-ai/shared_invite/zt-2wkh4pklz-w~h_DVDtEe9H5kyQlcNxVw)
 - [Discord 服务器](https://discord.gg/ESHStjSjD4)

 如果你想做出贡献，欢迎加入我们的社区。让我们一起简化软件工程！
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md
@@ -99,4 +99,4 @@ sandbox_user_id="1001"

 ## 讨论

-对于其他问题或疑问，请加入 [Slack](https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA) 或 [Discord](https://discord.gg/ESHStjSjD4) 提问！
+对于其他问题或疑问，请加入 [Slack](https://join.slack.com/t/openhands-ai/shared_invite/zt-2wkh4pklz-w~h_DVDtEe9H5kyQlcNxVw) 或 [Discord](https://discord.gg/ESHStjSjD4) 提问！
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/how-to/custom-sandbox-guide.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/how-to/custom-sandbox-guide.md
@@ -78,4 +78,4 @@ sandbox_user_id="1001"

 ## 讨论

-对于其他问题或疑问，请加入 [Slack](https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA) 或 [Discord](https://discord.gg/ESHStjSjD4) 并提问！
+对于其他问题或疑问，请加入 [Slack](https://join.slack.com/t/openhands-ai/shared_invite/zt-2wkh4pklz-w~h_DVDtEe9H5kyQlcNxVw) 或 [Discord](https://discord.gg/ESHStjSjD4) 并提问！
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/how-to/openshift-example.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/how-to/openshift-example.md
@@ -1,343 +0,0 @@
-以下是翻译后的内容:
-
-# Kubernetes
-
-在 Kubernetes 或 OpenShift 上运行 OpenHands 有不同的方式。本指南介绍了一种可能的方式:
-1. 作为集群管理员,创建一个 PV 将 workspace_base 数据和 docker 目录映射到 worker 节点上的 pod
-2. 创建一个 PVC 以便将这些 PV 挂载到 pod
-3. 创建一个包含两个容器的 pod:OpenHands 和 Sandbox 容器
-
-## 上述示例的详细步骤
-
-> 注意:确保首先使用适当的帐户登录到集群以执行每个步骤。创建 PV 需要集群管理员权限!
-
-> 确保你对下面使用的 hostPath(即 /tmp/workspace)有读写权限
-
-1. 创建 PV:
-集群管理员可以使用下面的示例 yaml 文件创建 PV。
- workspace-pv.yaml
-
-```yamlfile
-apiVersion: v1
-kind: PersistentVolume
-metadata:
-  name: workspace-pv
-spec:
-  capacity:
-    storage: 2Gi
-  accessModes:
-    - ReadWriteOnce
-  persistentVolumeReclaimPolicy: Retain
-  hostPath:
-    path: /tmp/workspace
-```
-
-```bash
-# 应用 yaml 文件
-$ oc create -f workspace-pv.yaml
-persistentvolume/workspace-pv created
-
-# 查看:
-$ oc get pv
-NAME                                       CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS      CLAIM                STORAGECLASS     REASON   AGE
-workspace-pv                               2Gi        RWO            Retain           Available                                                  7m23s
-```
-
- docker-pv.yaml
-
-```yamlfile
-apiVersion: v1
-kind: PersistentVolume
-metadata:
-  name: docker-pv
-spec:
-  capacity:
-    storage: 2Gi
-  accessModes:
-    - ReadWriteOnce
-  persistentVolumeReclaimPolicy: Retain
-  hostPath:
-    path: /var/run/docker.sock
-```
-
-```bash
-# 应用 yaml 文件
-$ oc create -f docker-pv.yaml
-persistentvolume/docker-pv created
-
-# 查看:
-oc get pv
-NAME                                       CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS      CLAIM                STORAGECLASS     REASON   AGE
-docker-pv                                  2Gi        RWO            Retain           Available                                                  6m55s
-workspace-pv                               2Gi        RWO            Retain           Available                                                  7m23s
-```
-
-2. 创建 PVC:
-下面是示例 PVC yaml 文件:
-
- workspace-pvc.yaml
-
-```yamlfile
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: workspace-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 1Gi
-```
-
-```bash
-# 创建 pvc
-$ oc create -f workspace-pvc.yaml
-persistentvolumeclaim/workspace-pvc created
-
-# 查看
-$ oc get pvc
-NAME            STATUS    VOLUME   CAPACITY   ACCESS MODES   STORAGECLASS     AGE
-workspace-pvc   Pending                                      hcloud-volumes   4s
-
-$ oc get events
-LAST SEEN   TYPE     REASON                 OBJECT                                MESSAGE
-8s          Normal   WaitForFirstConsumer   persistentvolumeclaim/workspace-pvc   waiting for first consumer to be created before binding
-```
-
- docker-pvc.yaml
-
-```yamlfile
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: docker-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 1Gi
-```
-
-```bash
-# 创建 pvc
-$ oc create -f docker-pvc.yaml
-persistentvolumeclaim/docker-pvc created
-
-# 查看
-$ oc get pvc
-NAME            STATUS    VOLUME   CAPACITY   ACCESS MODES   STORAGECLASS     AGE
-docker-pvc      Pending                                      hcloud-volumes   4s
-workspace-pvc   Pending                                      hcloud-volumes   2m53s
-
-$ oc get events
-LAST SEEN   TYPE     REASON                 OBJECT                                MESSAGE
-10s         Normal   WaitForFirstConsumer   persistentvolumeclaim/docker-pvc      waiting for first consumer to be created before binding
-10s         Normal   WaitForFirstConsumer   persistentvolumeclaim/workspace-pvc   waiting for first consumer to be created before binding
-```
-
-3. 创建 pod yaml 文件:
-下面是示例 pod yaml 文件:
-
- pod.yaml
-
-```yamlfile
-apiVersion: v1
-kind: Pod
-metadata:
-  name: openhands-app-2024
-  labels:
-    app: openhands-app-2024
-spec:
-  containers:
-  - name: openhands-app-2024
-    image: ghcr.io/all-hands-ai/openhands:main
-    env:
-    - name: SANDBOX_USER_ID
-      value: "1000"
-    - name: WORKSPACE_MOUNT_PATH
-      value: "/opt/workspace_base"
-    volumeMounts:
-    - name: workspace-volume
-      mountPath: /opt/workspace_base
-    - name: docker-sock
-      mountPath: /var/run/docker.sock
-    ports:
-    - containerPort: 3000
-  - name: openhands-sandbox-2024
-    image: ghcr.io/all-hands-ai/sandbox:main
-    ports:
-    - containerPort: 51963
-    command: ["/usr/sbin/sshd", "-D", "-p 51963", "-o", "PermitRootLogin=yes"]
-  volumes:
-  - name: workspace-volume
-    persistentVolumeClaim:
-      claimName: workspace-pvc
-  - name: docker-sock
-    persistentVolumeClaim:
-      claimName: docker-pvc
-```
-
-
-```bash
-# 创建 pod
-$ oc create -f pod.yaml
-W0716 11:22:07.776271  107626 warnings.go:70] would violate PodSecurity "restricted:v1.24": allowPrivilegeEscalation != false (containers "openhands-app-2024", "openhands-sandbox-2024" must set securityContext.allowPrivilegeEscalation=false), unrestricted capabilities (containers "openhands-app-2024", "openhands-sandbox-2024" must set securityContext.capabilities.drop=["ALL"]), runAsNonRoot != true (pod or containers "openhands-app-2024", "openhands-sandbox-2024" must set securityContext.runAsNonRoot=true), seccompProfile (pod or containers "openhands-app-2024", "openhands-sandbox-2024" must set securityContext.seccompProfile.type to "RuntimeDefault" or "Localhost")
-pod/openhands-app-2024 created
-
-# 上面的警告可以暂时忽略,因为我们不会修改 SCC 限制。
-
-# 查看
-$ oc get pods
-NAME                 READY   STATUS    RESTARTS   AGE
-openhands-app-2024   0/2     Pending   0          5s
-
-$ oc get pods
-NAME                 READY   STATUS              RESTARTS   AGE
-openhands-app-2024   0/2     ContainerCreating   0          15s
-
-$ oc get events
-LAST SEEN   TYPE     REASON                   OBJECT                                MESSAGE
-38s         Normal   WaitForFirstConsumer     persistentvolumeclaim/docker-pvc      waiting for first consumer to be created before binding
-23s         Normal   ExternalProvisioning     persistentvolumeclaim/docker-pvc      waiting for a volume to be created, either by external provisioner "csi.hetzner.cloud" or manually created by system administrator
-27s         Normal   Provisioning             persistentvolumeclaim/docker-pvc      External provisioner is provisioning volume for claim "openhands/docker-pvc"
-17s         Normal   ProvisioningSucceeded    persistentvolumeclaim/docker-pvc      Successfully provisioned volume pvc-2b1d223a-1c8f-4990-8e3d-68061a9ae252
-16s         Normal   Scheduled                pod/openhands-app-2024                Successfully assigned All-Hands-AI/OpenHands-app-2024 to worker1.hub.internal.blakane.com
-9s          Normal   SuccessfulAttachVolume   pod/openhands-app-2024                AttachVolume.Attach succeeded for volume "pvc-2b1d223a-1c8f-4990-8e3d-68061a9ae252"
-9s          Normal   SuccessfulAttachVolume   pod/openhands-app-2024                AttachVolume.Attach succeeded for volume "pvc-31f15b25-faad-4665-a25f-201a530379af"
-6s          Normal   AddedInterface           pod/openhands-app-2024                Add eth0 [10.128.2.48/23] from openshift-sdn
-6s          Normal   Pulled                   pod/openhands-app-2024                Container image "ghcr.io/all-hands-ai/openhands:main" already present on machine
-6s          Normal   Created                  pod/openhands-app-2024                Created container openhands-app-2024
-6s          Normal   Started                  pod/openhands-app-2024                Started container openhands-app-2024
-6s          Normal   Pulled                   pod/openhands-app-2024                Container image "ghcr.io/all-hands-ai/sandbox:main" already present on machine
-5s          Normal   Created                  pod/openhands-app-2024                Created container openhands-sandbox-2024
-5s          Normal   Started                  pod/openhands-app-2024                Started container openhands-sandbox-2024
-83s         Normal   WaitForFirstConsumer     persistentvolumeclaim/workspace-pvc   waiting for first consumer to be created before binding
-27s         Normal   Provisioning             persistentvolumeclaim/workspace-pvc   External provisioner is provisioning volume for claim "openhands/workspace-pvc"
-17s         Normal   ProvisioningSucceeded    persistentvolumeclaim/workspace-pvc   Successfully provisioned volume pvc-31f15b25-faad-4665-a25f-201a530379af
-
-$ oc get pods
-NAME                 READY   STATUS    RESTARTS   AGE
-openhands-app-2024   2/2     Running   0          23s
-
-$ oc get pvc
-NAME            STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS     AGE
-docker-pvc      Bound    pvc-2b1d223a-1c8f-4990-8e3d-68061a9ae252   10Gi       RWO            hcloud-volumes   10m
-workspace-pvc   Bound    pvc-31f15b25-faad-4665-a25f-201a530379af   10Gi       RWO            hcloud-volumes   13m
-
-```
-
-4. 创建一个 NodePort 服务。
-下面是示例服务创建命令:
-
-```bash
-# 创建 NodePort 类型的服务
-$ oc create svc nodeport  openhands-app-2024  --tcp=3000:3000
-service/openhands-app-2024 created
-
-# 查看
-
-$ oc get svc
-NAME                 TYPE       CLUSTER-IP      EXTERNAL-IP   PORT(S)          AGE
-openhands-app-2024   NodePort   172.30.225.42   <none>        3000:30495/TCP   4s
-
-$ oc describe svc openhands-app-2024
-Name:                     openhands-app-2024
-Namespace:                openhands
-Labels:                   app=openhands-app-2024
-Annotations:              <none>
-Selector:                 app=openhands-app-2024
-Type:                     NodePort
-IP Family Policy:         SingleStack
-IP Families:              IPv4
-IP:                       172.30.225.42
-IPs:                      172.30.225.42
-Port:                     3000-3000  3000/TCP
-TargetPort:               3000/TCP
-NodePort:                 3000-3000  30495/TCP
-Endpoints:                10.128.2.48:3000
-Session Affinity:         None
-External Traffic Policy:  Cluster
-Events:                   <none>
-```
-
-6. 连接到 OpenHands UI,配置 Agent,然后测试:
-
-![image](https://github.com/user-attachments/assets/12f94804-a0c7-4744-b873-e003c9caf40e)
-
-
-
-## GCP GKE OpenHands 部署
-
-**警告**:此部署授予 OpenHands 应用程序访问 Kubernetes docker socket 的权限,这会带来安全风险。请自行决定是否使用。
-1- 创建特权访问策略
-2- 创建 gke 凭证(可选)
-3- 创建 openhands 部署
-4- 验证和 UI 访问命令
-5- 排查 pod 以验证内部容器
-
-1. 创建特权访问策略
-```bash
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
-  name: privileged-role
-rules:
- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["create", "get", "list", "watch", "delete"]
- apiGroups: ["apps"]
-  resources: ["deployments"]
-  verbs: ["create", "get", "list", "watch", "delete"]
- apiGroups: [""]
-  resources: ["pods/exec"]
-  verbs: ["create"]
- apiGroups: [""]
-  resources: ["pods/log"]
-  verbs: ["get"]
---
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
-  name: privileged-role-binding
-roleRef:
-  apiGroup: rbac.authorization.k8s.io
-  kind: ClusterRole
-  name: privileged-role
-subjects:
- kind: ServiceAccount
-  name: default  # 更改为你的服务帐户名称
-  namespace: default
-```
-2. 创建 gke 凭证(可选)
-```bash
-kubectl create secret generic google-cloud-key \
-  --from-file=key.json=/path/to/your/google-cloud-key.json
-  ```
-3. 创建 openhands 部署
-## 由于这是针对单个工作节点进行测试的,如果你有多个节点,请指定单个工作节点的标志
-
-```bash
-kind: Deployment
-metadata:
-  name: openhands-app-2024
-  labels:
-    app: openhands-app-2024
-spec:
-  replicas: 1  # 你可以增加这个数字以获得多个副本
-  selector:
-    matchLabels:
-      app: openhands-app-2024
-  template:
-    metadata:
-      labels:
-        app: openhands-app-2024
-    spec:
-      containers:
-      - name: openhands-app-2024
-        image: ghcr.io/all-hands-ai/openhands:main
-        env:
-        - name: SANDBOX_USER_ID
-          value: "1000"
-        - name: SANDBOX_API
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/intro.mdx
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/intro.mdx
@@ -42,7 +42,7 @@ OpenHands 是一个**自主 AI 软件工程师**，能够执行复杂的工程
  />
 </a>
 <br></br>
-<a href="https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA">
+<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2wkh4pklz-w~h_DVDtEe9H5kyQlcNxVw">
  <img
    src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge"
    alt="Join our Slack community"
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/troubleshooting/troubleshooting.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/troubleshooting/troubleshooting.md
@@ -7,7 +7,6 @@
 :::tip
 OpenHands 仅通过 [WSL](https://learn.microsoft.com/en-us/windows/wsl/install) 支持 Windows。
 请确保在您的 WSL 终端内运行所有命令。
-查看 [Windows 用户的 WSL 注意事项](troubleshooting/windows) 以获取一些故障排除指南。
 :::

 ## 常见问题
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/troubleshooting/windows.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/troubleshooting/windows.md
@@ -1,66 +0,0 @@
-以下是翻译后的内容:
-
-# 针对 Windows 上 WSL 用户的注意事项
-
-OpenHands 仅通过 [WSL](https://learn.microsoft.com/en-us/windows/wsl/install) 支持 Windows。
-请确保在您的 WSL 终端内运行所有命令。
-
-## 故障排除
-
-### 建议: 不要以 root 用户身份运行
-
-出于安全原因,强烈建议不要以 root 用户身份运行 OpenHands,而是以具有非零 UID 的用户身份运行。
-
-参考:
-
-* [为什么以 root 身份登录不好](https://askubuntu.com/questions/16178/why-is-it-bad-to-log-in-as-root)
-* [在 WSL 中设置默认用户](https://www.tenforums.com/tutorials/128152-set-default-user-windows-subsystem-linux-distro-windows-10-a.html#option2)
-关于第二个参考的提示:对于 Ubuntu 用户,命令实际上可能是 "ubuntupreview" 而不是 "ubuntu"。
-
---
-### 错误: 在此 WSL 2 发行版中找不到 'docker'。
-
-如果您正在使用 Docker Desktop,请确保在从 WSL 内部调用任何 docker 命令之前启动它。
-Docker 还需要激活 WSL 集成选项。
-
---
-### Poetry 安装
-
-* 如果您在构建过程中安装 Poetry 后仍然面临运行 Poetry 的问题,您可能需要将其二进制路径添加到环境中:
-
-```sh
-export PATH="$HOME/.local/bin:$PATH"
-```
-
-* 如果 make build 在如下错误上停止:
-
-```sh
-ModuleNotFoundError: no module named <module-name>
-```
-
-这可能是 Poetry 缓存的问题。
-尝试依次运行这两个命令:
-
-```sh
-rm -r ~/.cache/pypoetry
-make build
-```
-
---
-### NoneType 对象没有属性 'request'
-
-如果您在执行 `make run` 时遇到与网络相关的问题,例如 `NoneType 对象没有属性 'request'`,您可能需要配置 WSL2 网络设置。请按照以下步骤操作:
-
-* 在 Windows 主机上打开或创建位于 `C:\Users\%username%\.wslconfig` 的 `.wslconfig` 文件。
-* 将以下配置添加到 `.wslconfig` 文件中:
-
-```sh
-[wsl2]
-networkingMode=mirrored
-localhostForwarding=true
-```
-
-* 保存 `.wslconfig` 文件。
-* 通过退出任何正在运行的 WSL2 实例并在命令提示符或终端中执行 `wsl --shutdown` 命令来完全重启 WSL2。
-* 重新启动 WSL 后,再次尝试执行 `make run`。
-网络问题应该得到解决。
--- a/docs/modules/usage/how-to/cli-mode.md
+++ b/docs/modules/usage/how-to/cli-mode.md
@@ -50,7 +50,7 @@ LLM_API_KEY="sk_test_12345"
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.15-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.16-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -59,7 +59,7 @@ docker run -it \
    -v /var/run/docker.sock:/var/run/docker.sock \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.15 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.16 \
    python -m openhands.core.cli
 ```

--- a/docs/modules/usage/how-to/github-action.md
+++ b/docs/modules/usage/how-to/github-action.md
@@ -39,23 +39,28 @@ You can provide custom directions for OpenHands by following the [README for the

 ### Custom configurations

-Github resolver will automatically check for valid [repository secrets](https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions?tool=webui#creating-secrets-for-a-repository) or [repository variables](https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#creating-configuration-variables-for-a-repository) to customize its behavior. The customization options you can set are:
+Github resolver will automatically check for valid [repository secrets](https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions?tool=webui#creating-secrets-for-a-repository) or [repository variables](https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#creating-configuration-variables-for-a-repository) to customize its behavior.
+The customization options you can set are:

-| **Attribute name**               | **Type** | **Purpose**                                                                                         | **Example**                                     |
-| -------------------------------- | -------- | --------------------------------------------------------------------------------------------------- | ----------------------------------------------- |
-| `OPENHANDS_MAX_ITER`             | Variable | Set max limit for agent iterations                                                                  | `OPENHANDS_MAX_ITER=10`                         |
-| `OPENHANDS_MACRO`                | Variable | Customize default macro for invoking the resolver                                                   | `OPENHANDS_MACRO=@resolveit`                    |
-| `OPENHANDS_BASE_CONTAINER_IMAGE` | Variable | Custom Sandbox ([learn more](https://docs.all-hands.dev/modules/usage/how-to/custom-sandbox-guide)) | `OPENHANDS_BASE_CONTAINER_IMAGE="custom_image"` |
+| **Attribute name**               | **Type** | **Purpose**                                                                                                 | **Example**                                          |
+|----------------------------------| -------- |-------------------------------------------------------------------------------------------------------------|------------------------------------------------------|
+| `LLM_MODEL`                      | Variable | Set the LLM to use with OpenHands                                                                           | `LLM_MODEL="anthropic/claude-3-5-sonnet-20241022"`   |
+| `OPENHANDS_MAX_ITER`             | Variable | Set max limit for agent iterations                                                                          | `OPENHANDS_MAX_ITER=10`                              |
+| `OPENHANDS_MACRO`                | Variable | Customize default macro for invoking the resolver                                                           | `OPENHANDS_MACRO=@resolveit`                         |
+| `OPENHANDS_BASE_CONTAINER_IMAGE` | Variable | Custom Sandbox ([learn more](https://docs.all-hands.dev/modules/usage/how-to/custom-sandbox-guide))         | `OPENHANDS_BASE_CONTAINER_IMAGE="custom_image"`      |

 ## Writing Effective .openhands_instructions Files

-The `.openhands_instructions` file is a file that you can put in the root directory of your repository to guide OpenHands in understanding and working with your repository effectively. Here are key tips for writing high-quality instructions:
+The `.openhands_instructions` file is a file that you can put in the root directory of your repository to guide OpenHands
+in understanding and working with your repository effectively. Here are key tips for writing high-quality instructions:

 ### Core Principles

-1. **Concise but Informative**: Provide a clear, focused overview of the repository that emphasizes the most common actions OpenHands will need to perform.
+1. **Concise but Informative**: Provide a clear, focused overview of the repository that emphasizes the most common
+     actions OpenHands will need to perform.

-2. **Repository Structure**: Explain the key directories and their purposes, especially highlighting where different types of code (e.g., frontend, backend) are located.
+2. **Repository Structure**: Explain the key directories and their purposes, especially highlighting where different
+     types of code (e.g., frontend, backend) are located.

 3. **Development Workflows**: Document the essential commands for:

--- a/docs/modules/usage/how-to/gui-mode.md
+++ b/docs/modules/usage/how-to/gui-mode.md
@@ -23,10 +23,75 @@ OpenHands provides a user-friendly Graphical User Interface (GUI) mode for inter

 OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if it is available. This can happen in two ways:

-1. Locally (OSS): The user directly inputs their GitHub token.
-2. Online (SaaS): The token is obtained through GitHub OAuth authentication.
+1. **Locally (OSS)**: The user directly inputs their GitHub token
+2. **Online (SaaS)**: The token is obtained through GitHub OAuth authentication

-When you reach the `/app` route, the app checks if a token is present. If it finds one, it sets it in the environment for the agent to use.
+#### Setting Up a Local GitHub Token
+
+1. **Generate a Personal Access Token (PAT)**:
+   - Go to GitHub Settings > Developer Settings > Personal Access Tokens > Tokens (classic)
+   - Click "Generate new token (classic)"
+   - Required scopes:
+     - `repo` (Full control of private repositories)
+     - `workflow` (Update GitHub Action workflows)
+     - `read:org` (Read organization data)
+
+2. **Enter Token in OpenHands**:
+   - Click the Settings button (gear icon) in the top right
+   - Navigate to the "GitHub" section
+   - Paste your token in the "GitHub Token" field
+   - Click "Save" to apply the changes
+
+#### Organizational Token Policies
+
+If you're working with organizational repositories, additional setup may be required:
+
+1. **Check Organization Requirements**:
+   - Organization admins may enforce specific token policies
+   - Some organizations require tokens to be created with SSO enabled
+   - Review your organization's [token policy settings](https://docs.github.com/en/organizations/managing-programmatic-access-to-your-organization/setting-a-personal-access-token-policy-for-your-organization)
+
+2. **Verify Organization Access**:
+   - Go to your token settings on GitHub
+   - Look for the organization under "Organization access"
+   - If required, click "Enable SSO" next to your organization
+   - Complete the SSO authorization process
+
+#### OAuth Authentication (Online Mode)
+
+When using OpenHands in online mode, the GitHub OAuth flow:
+
+1. Requests the following permissions:
+   - Repository access (read/write)
+   - Workflow management
+   - Organization read access
+
+2. Authentication steps:
+   - Click "Sign in with GitHub" when prompted
+   - Review the requested permissions
+   - Authorize OpenHands to access your GitHub account
+   - If using an organization, authorize organization access if prompted
+
+#### Troubleshooting
+
+Common issues and solutions:
+
+1. **Token Not Recognized**:
+   - Ensure the token is properly saved in settings
+   - Check that the token hasn't expired
+   - Verify the token has the required scopes
+   - Try regenerating the token
+
+2. **Organization Access Denied**:
+   - Check if SSO is required but not enabled
+   - Verify organization membership
+   - Contact organization admin if token policies are blocking access
+
+3. **Verifying Token Works**:
+   - The app will show a green checkmark if the token is valid
+   - Try accessing a repository to confirm permissions
+   - Check the browser console for any error messages
+   - Use the "Test Connection" button in settings if available

 ### Advanced Settings

--- a/docs/modules/usage/how-to/headless-mode.md
+++ b/docs/modules/usage/how-to/headless-mode.md
@@ -44,7 +44,7 @@ LLM_API_KEY="sk_test_12345"
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.15-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.16-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -54,6 +54,6 @@ docker run -it \
    -v /var/run/docker.sock:/var/run/docker.sock \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.15 \
-    python -m openhands.core.main -t "write a bash script that prints hi"
+    docker.all-hands.dev/all-hands-ai/openhands:0.16 \
+    python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
 ```
--- a/docs/modules/usage/how-to/openshift-example.md
+++ b/docs/modules/usage/how-to/openshift-example.md
@@ -1,429 +0,0 @@
-# Kubernetes
-
-There are different ways you might run OpenHands on Kubernetes or OpenShift. This guide goes through one possible way:
-1. Create a PV "as a cluster admin" to map workspace_base data and docker directory to the pod through the worker node
-2. Create a PVC to be able to mount those PVs to the pod
-3. Create a pod which contains two containers; the OpenHands and Sandbox containers
-
-## Detailed Steps for the Example Above
-
-> Note: Make sure you are logged in to the cluster first with the proper account for each step. PV creation requires cluster administrator!
-
-> Make sure you have read/write permissions on the hostPath used below (i.e. /tmp/workspace)
-
-1. Create the PV:
-Sample yaml file below can be used by a cluster admin to create the PV.
- workspace-pv.yaml
-
-```yamlfile
-apiVersion: v1
-kind: PersistentVolume
-metadata:
-  name: workspace-pv
-spec:
-  capacity:
-    storage: 2Gi
-  accessModes:
-    - ReadWriteOnce
-  persistentVolumeReclaimPolicy: Retain
-  hostPath:
-    path: /tmp/workspace
-```
-
-```bash
-# apply yaml file
-$ oc create -f workspace-pv.yaml
-persistentvolume/workspace-pv created
-
-# review:
-$ oc get pv
-NAME                                       CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS      CLAIM                STORAGECLASS     REASON   AGE
-workspace-pv                               2Gi        RWO            Retain           Available                                                  7m23s
-```
-
- docker-pv.yaml
-
-```yamlfile
-apiVersion: v1
-kind: PersistentVolume
-metadata:
-  name: docker-pv
-spec:
-  capacity:
-    storage: 2Gi
-  accessModes:
-    - ReadWriteOnce
-  persistentVolumeReclaimPolicy: Retain
-  hostPath:
-    path: /var/run/docker.sock
-```
-
-```bash
-# apply yaml file
-$ oc create -f docker-pv.yaml
-persistentvolume/docker-pv created
-
-# review:
-oc get pv
-NAME                                       CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS      CLAIM                STORAGECLASS     REASON   AGE
-docker-pv                                  2Gi        RWO            Retain           Available                                                  6m55s
-workspace-pv                               2Gi        RWO            Retain           Available                                                  7m23s
-```
-
-2. Create the PVC:
-Sample PVC yaml file below:
-
- workspace-pvc.yaml
-
-```yamlfile
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: workspace-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 1Gi
-```
-
-```bash
-# create the pvc
-$ oc create -f workspace-pvc.yaml
-persistentvolumeclaim/workspace-pvc created
-
-# review
-$ oc get pvc
-NAME            STATUS    VOLUME   CAPACITY   ACCESS MODES   STORAGECLASS     AGE
-workspace-pvc   Pending                                      hcloud-volumes   4s
-
-$ oc get events
-LAST SEEN   TYPE     REASON                 OBJECT                                MESSAGE
-8s          Normal   WaitForFirstConsumer   persistentvolumeclaim/workspace-pvc   waiting for first consumer to be created before binding
-```
-
- docker-pvc.yaml
-
-```yamlfile
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: docker-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 1Gi
-```
-
-```bash
-# create pvc
-$ oc create -f docker-pvc.yaml
-persistentvolumeclaim/docker-pvc created
-
-# review
-$ oc get pvc
-NAME            STATUS    VOLUME   CAPACITY   ACCESS MODES   STORAGECLASS     AGE
-docker-pvc      Pending                                      hcloud-volumes   4s
-workspace-pvc   Pending                                      hcloud-volumes   2m53s
-
-$ oc get events
-LAST SEEN   TYPE     REASON                 OBJECT                                MESSAGE
-10s         Normal   WaitForFirstConsumer   persistentvolumeclaim/docker-pvc      waiting for first consumer to be created before binding
-10s         Normal   WaitForFirstConsumer   persistentvolumeclaim/workspace-pvc   waiting for first consumer to be created before binding
-```
-
-3. Create the pod yaml file:
-Sample pod yaml file below:
-
- pod.yaml
-
-```yamlfile
-apiVersion: v1
-kind: Pod
-metadata:
-  name: openhands-app-2024
-  labels:
-    app: openhands-app-2024
-spec:
-  containers:
-  - name: openhands-app-2024
-    image: docker.all-hands.dev/all-hands-ai/openhands:main
-    env:
-    - name: SANDBOX_USER_ID
-      value: "1000"
-    - name: WORKSPACE_MOUNT_PATH
-      value: "/opt/workspace_base"
-    volumeMounts:
-    - name: workspace-volume
-      mountPath: /opt/workspace_base
-    - name: docker-sock
-      mountPath: /var/run/docker.sock
-    ports:
-    - containerPort: 3000
-  - name: openhands-sandbox-2024
-    image: docker.all-hands.dev/all-hands-ai/runtime:main
-    ports:
-    - containerPort: 51963
-    command: ["/usr/sbin/sshd", "-D", "-p 51963", "-o", "PermitRootLogin=yes"]
-  volumes:
-  - name: workspace-volume
-    persistentVolumeClaim:
-      claimName: workspace-pvc
-  - name: docker-sock
-    persistentVolumeClaim:
-      claimName: docker-pvc
-```
-
-
-```bash
-# create the pod
-$ oc create -f pod.yaml
-W0716 11:22:07.776271  107626 warnings.go:70] would violate PodSecurity "restricted:v1.24": allowPrivilegeEscalation != false (containers "openhands-app-2024", "openhands-sandbox-2024" must set securityContext.allowPrivilegeEscalation=false), unrestricted capabilities (containers "openhands-app-2024", "openhands-sandbox-2024" must set securityContext.capabilities.drop=["ALL"]), runAsNonRoot != true (pod or containers "openhands-app-2024", "openhands-sandbox-2024" must set securityContext.runAsNonRoot=true), seccompProfile (pod or containers "openhands-app-2024", "openhands-sandbox-2024" must set securityContext.seccompProfile.type to "RuntimeDefault" or "Localhost")
-pod/openhands-app-2024 created
-
-# Above warning can be ignored for now as we will not modify SCC restrictions.
-
-# review
-$ oc get pods
-NAME                 READY   STATUS    RESTARTS   AGE
-openhands-app-2024   0/2     Pending   0          5s
-
-$ oc get pods
-NAME                 READY   STATUS              RESTARTS   AGE
-openhands-app-2024   0/2     ContainerCreating   0          15s
-
-$ oc get events
-LAST SEEN   TYPE     REASON                   OBJECT                                MESSAGE
-38s         Normal   WaitForFirstConsumer     persistentvolumeclaim/docker-pvc      waiting for first consumer to be created before binding
-23s         Normal   ExternalProvisioning     persistentvolumeclaim/docker-pvc      waiting for a volume to be created, either by external provisioner "csi.hetzner.cloud" or manually created by system administrator
-27s         Normal   Provisioning             persistentvolumeclaim/docker-pvc      External provisioner is provisioning volume for claim "openhands/docker-pvc"
-17s         Normal   ProvisioningSucceeded    persistentvolumeclaim/docker-pvc      Successfully provisioned volume pvc-2b1d223a-1c8f-4990-8e3d-68061a9ae252
-16s         Normal   Scheduled                pod/openhands-app-2024                Successfully assigned All-Hands-AI/OpenHands-app-2024 to worker1.hub.internal.blakane.com
-9s          Normal   SuccessfulAttachVolume   pod/openhands-app-2024                AttachVolume.Attach succeeded for volume "pvc-2b1d223a-1c8f-4990-8e3d-68061a9ae252"
-9s          Normal   SuccessfulAttachVolume   pod/openhands-app-2024                AttachVolume.Attach succeeded for volume "pvc-31f15b25-faad-4665-a25f-201a530379af"
-6s          Normal   AddedInterface           pod/openhands-app-2024                Add eth0 [10.128.2.48/23] from openshift-sdn
-6s          Normal   Pulled                   pod/openhands-app-2024                Container image "docker.all-hands.dev/all-hands-ai/openhands:main" already present on machine
-6s          Normal   Created                  pod/openhands-app-2024                Created container openhands-app-2024
-6s          Normal   Started                  pod/openhands-app-2024                Started container openhands-app-2024
-6s          Normal   Pulled                   pod/openhands-app-2024                Container image "docker.all-hands.dev/all-hands-ai/sandbox:main" already present on machine
-5s          Normal   Created                  pod/openhands-app-2024                Created container openhands-sandbox-2024
-5s          Normal   Started                  pod/openhands-app-2024                Started container openhands-sandbox-2024
-83s         Normal   WaitForFirstConsumer     persistentvolumeclaim/workspace-pvc   waiting for first consumer to be created before binding
-27s         Normal   Provisioning             persistentvolumeclaim/workspace-pvc   External provisioner is provisioning volume for claim "openhands/workspace-pvc"
-17s         Normal   ProvisioningSucceeded    persistentvolumeclaim/workspace-pvc   Successfully provisioned volume pvc-31f15b25-faad-4665-a25f-201a530379af
-
-$ oc get pods
-NAME                 READY   STATUS    RESTARTS   AGE
-openhands-app-2024   2/2     Running   0          23s
-
-$ oc get pvc
-NAME            STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS     AGE
-docker-pvc      Bound    pvc-2b1d223a-1c8f-4990-8e3d-68061a9ae252   10Gi       RWO            hcloud-volumes   10m
-workspace-pvc   Bound    pvc-31f15b25-faad-4665-a25f-201a530379af   10Gi       RWO            hcloud-volumes   13m
-
-```
-
-4. Create a NodePort service.
-Sample service creation command below:
-
-```bash
-# create the service of type NodePort
-$ oc create svc nodeport  openhands-app-2024  --tcp=3000:3000
-service/openhands-app-2024 created
-
-# review
-
-$ oc get svc
-NAME                 TYPE       CLUSTER-IP      EXTERNAL-IP   PORT(S)          AGE
-openhands-app-2024   NodePort   172.30.225.42   <none>        3000:30495/TCP   4s
-
-$ oc describe svc openhands-app-2024
-Name:                     openhands-app-2024
-Namespace:                openhands
-Labels:                   app=openhands-app-2024
-Annotations:              <none>
-Selector:                 app=openhands-app-2024
-Type:                     NodePort
-IP Family Policy:         SingleStack
-IP Families:              IPv4
-IP:                       172.30.225.42
-IPs:                      172.30.225.42
-Port:                     3000-3000  3000/TCP
-TargetPort:               3000/TCP
-NodePort:                 3000-3000  30495/TCP
-Endpoints:                10.128.2.48:3000
-Session Affinity:         None
-External Traffic Policy:  Cluster
-Events:                   <none>
-```
-
-6. Connect to OpenHands UI, configure the Agent, then test:
-
-![image](https://github.com/user-attachments/assets/12f94804-a0c7-4744-b873-e003c9caf40e)
-
-
-
-## GCP GKE Openhands deployment
-
-**Warning**: this deployment grants the OpenHands application access to the Kubernetes docker socket, which creates security risk. Use at your own discretion.
-1- Create policy for privillege access
-2- Create gke credentials(optional)
-3- Create openhands deployment
-4- Verification and ui access commands
-5- Tshoot pod to verify the internal container
-
-1. create policy for privillege access
-```bash
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
-  name: privileged-role
-rules:
- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["create", "get", "list", "watch", "delete"]
- apiGroups: ["apps"]
-  resources: ["deployments"]
-  verbs: ["create", "get", "list", "watch", "delete"]
- apiGroups: [""]
-  resources: ["pods/exec"]
-  verbs: ["create"]
- apiGroups: [""]
-  resources: ["pods/log"]
-  verbs: ["get"]
---
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
-  name: privileged-role-binding
-roleRef:
-  apiGroup: rbac.authorization.k8s.io
-  kind: ClusterRole
-  name: privileged-role
-subjects:
- kind: ServiceAccount
-  name: default  # Change to your service account name
-  namespace: default
-```
-2. create gke credentials(optional)
-```bash
-kubectl create secret generic google-cloud-key \
-  --from-file=key.json=/path/to/your/google-cloud-key.json
-  ```
-3. create openhands deployment
-## as this is tested for the single worker node if you have multiple specify the flag for the single worker
-
-```bash
-kind: Deployment
-metadata:
-  name: openhands-app-2024
-  labels:
-    app: openhands-app-2024
-spec:
-  replicas: 1  # You can increase this number for multiple replicas
-  selector:
-    matchLabels:
-      app: openhands-app-2024
-  template:
-    metadata:
-      labels:
-        app: openhands-app-2024
-    spec:
-      containers:
-      - name: openhands-app-2024
-        image: docker.all-hands.dev/all-hands-ai/openhands:main
-        env:
-        - name: SANDBOX_USER_ID
-          value: "1000"
-        - name: SANDBOX_API_HOSTNAME
-          value: '10.164.0.4'
-        - name: WORKSPACE_MOUNT_PATH
-          value: "/tmp/workspace_base"
-        - name: GOOGLE_APPLICATION_CREDENTIALS
-          value: "/tmp/workspace_base/google-cloud-key.json"
-        volumeMounts:
-        - name: workspace-volume
-          mountPath: /tmp/workspace_base
-        - name: docker-sock
-          mountPath: /var/run/docker.sock
-        - name: google-credentials
-          mountPath: "/tmp/workspace_base/google-cloud-key.json"
-        securityContext:
-          privileged: true  # Add this to allow privileged access
-        ports:
-        - containerPort: 3000
-      - name: openhands-sandbox-2024
-        image: docker.all-hands.dev/all-hands-ai/runtime:main
-    #    securityContext:
-    #      privileged: true  # Add this to allow privileged access
-        ports:
-        - containerPort: 51963
-        command: ["/usr/sbin/sshd", "-D", "-p 51963", "-o", "PermitRootLogin=yes"]
-      volumes:
-      #- name: workspace-volume
-      #  persistentVolumeClaim:
-      #    claimName: workspace-pvc
-      - name: workspace-volume
-        emptyDir: {}
-      - name: docker-sock
-        hostPath:
-          path: /var/run/docker.sock       # Use host's Docker socket
-          type: Socket
-      - name: google-credentials
-        secret:
-          secretName: google-cloud-key
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: openhands-app-2024-svc
-spec:
-  selector:
-    app: openhands-app-2024
-  ports:
-  - name: http
-    protocol: TCP
-    port: 80
-    targetPort: 3000
-  - name: ssh
-    protocol: TCP
-    port: 51963
-    targetPort: 51963
-  type: LoadBalancer
-  ```
-
-5. Tshoot pod to verify the internal container
-### if you want to know more regarding the internal container runtime use below mention pod deployment use kubectl exec -it to enter into container and you can check the contaienr run time using normal docker commands like "docker ps -a"
-
-```bash
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: docker-in-docker
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: docker-in-docker
-  template:
-    metadata:
-      labels:
-        app: docker-in-docker
-    spec:
-      containers:
-      - name: dind
-        image: docker:20.10-dind
-        securityContext:
-          privileged: true
-        volumeMounts:
-        - name: docker-sock
-          mountPath: /var/run/docker.sock
-      volumes:
-      - name: docker-sock
-        hostPath:
-          path: /var/run/docker.sock
-          type: Socket
-```
--- a/docs/modules/usage/how-to/persist-session-data.md
+++ b/docs/modules/usage/how-to/persist-session-data.md
@@ -0,0 +1,16 @@
+# Persisting Session Data
+
+Using the standard installation, the session data is stored in memory. Currently, if OpenHands' service is restarted,
+previous sessions become invalid (a new secret is generated) and thus not recoverable.
+
+## How to Persist Session Data
+
+### Development Workflow
+In the `config.toml` file, specify the following:
+```
+[core]
+...
+file_store="local"
+file_store_path="/absolute/path/to/openhands/cache/directory"
+jwt_secret="secretpass"
+```
--- a/docs/modules/usage/installation.mdx
+++ b/docs/modules/usage/installation.mdx
@@ -11,16 +11,16 @@
 The easiest way to run OpenHands is in Docker.

 ```bash
-docker pull docker.all-hands.dev/all-hands-ai/runtime:0.15-nikolaik
+docker pull docker.all-hands.dev/all-hands-ai/runtime:0.16-nikolaik

 docker run -it --rm --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.15-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.16-nikolaik \
    -e LOG_ALL_EVENTS=true \
    -v /var/run/docker.sock:/var/run/docker.sock \
    -p 3000:3000 \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app \
-    docker.all-hands.dev/all-hands-ai/openhands:0.15
+    docker.all-hands.dev/all-hands-ai/openhands:0.16
 ```

 You can also run OpenHands in a scriptable [headless mode](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), as an [interactive CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode), or using the [OpenHands GitHub Action](https://docs.all-hands.dev/modules/usage/how-to/github-action).
--- a/docs/modules/usage/micro-agents.md
+++ b/docs/modules/usage/micro-agents.md
@@ -0,0 +1,213 @@
+# Micro-Agents
+
+OpenHands uses specialized micro-agents to handle specific tasks and contexts efficiently. These micro-agents are small, focused components that provide specialized behavior and knowledge for particular scenarios.
+
+## Overview
+
+Micro-agents are defined in markdown files under the `openhands/agenthub/codeact_agent/micro/` directory. Each micro-agent is configured with:
+
+- A unique name
+- The agent type (typically CodeActAgent)
+- Trigger keywords that activate the agent
+- Specific instructions and capabilities
+
+## Available Micro-Agents
+
+### GitHub Agent
+**File**: `github.md`
+**Triggers**: `github`, `git`
+
+The GitHub agent specializes in GitHub API interactions and repository management. It:
+- Has access to a `GITHUB_TOKEN` for API authentication
+- Follows strict guidelines for repository interactions
+- Handles branch management and pull requests
+- Uses the GitHub API instead of web browser interactions
+
+Key features:
+- Branch protection (prevents direct pushes to main/master)
+- Automated PR creation
+- Git configuration management
+- API-first approach for GitHub operations
+
+### NPM Agent
+**File**: `npm.md`
+**Triggers**: `npm`
+
+Specializes in handling npm package management with specific focus on:
+- Non-interactive shell operations
+- Automated confirmation handling using Unix 'yes' command
+- Package installation automation
+
+### Custom Micro-Agents
+
+You can create your own micro-agents by adding new markdown files to the micro-agents directory. Each file should follow this structure:
+
+```markdown
+---
+name: agent_name
+agent: CodeActAgent
+triggers:
+- trigger_word1
+- trigger_word2
+---
+
+Instructions and capabilities for the micro-agent...
+```
+
+## Best Practices
+
+When working with micro-agents:
+
+1. **Use Appropriate Triggers**: Ensure your commands include the relevant trigger words to activate the correct micro-agent
+2. **Follow Agent Guidelines**: Each agent has specific instructions and limitations - respect these for optimal results
+3. **API-First Approach**: When available, use API endpoints rather than web interfaces
+4. **Automation Friendly**: Design commands that work well in non-interactive environments
+
+## Integration
+
+Micro-agents are automatically integrated into OpenHands' workflow. They:
+- Monitor incoming commands for their trigger words
+- Activate when relevant triggers are detected
+- Apply their specialized knowledge and capabilities
+- Follow their specific guidelines and restrictions
+
+## Example Usage
+
+```bash
+# GitHub agent example
+git checkout -b feature-branch
+git commit -m "Add new feature"
+git push origin feature-branch
+
+# NPM agent example
+yes | npm install package-name
+```
+
+For more information about specific agents, refer to their individual documentation files in the micro-agents directory.
+
+## Contributing a Micro-Agent
+
+To contribute a new micro-agent to OpenHands, follow these guidelines:
+
+### 1. Planning Your Micro-Agent
+
+Before creating a micro-agent, consider:
+- What specific problem or use case will it address?
+- What unique capabilities or knowledge should it have?
+- What trigger words make sense for activating it?
+- What constraints or guidelines should it follow?
+
+### 2. File Structure
+
+Create a new markdown file in `openhands/agenthub/codeact_agent/micro/` with a descriptive name (e.g., `docker.md` for a Docker-focused agent).
+
+### 3. Required Components
+
+Your micro-agent file must include:
+
+1. **Front Matter**: YAML metadata at the start of the file:
+```markdown
+---
+name: your_agent_name
+agent: CodeActAgent
+triggers:
+- trigger_word1
+- trigger_word2
+---
+```
+
+2. **Instructions**: Clear, specific guidelines for the agent's behavior:
+```markdown
+You are responsible for [specific task/domain].
+
+Key responsibilities:
+1. [Responsibility 1]
+2. [Responsibility 2]
+
+Guidelines:
+- [Guideline 1]
+- [Guideline 2]
+
+Examples of usage:
+[Example 1]
+[Example 2]
+```
+
+### 4. Best Practices for Micro-Agent Development
+
+1. **Clear Scope**: Keep the agent focused on a specific domain or task
+2. **Explicit Instructions**: Provide clear, unambiguous guidelines
+3. **Useful Examples**: Include practical examples of common use cases
+4. **Safety First**: Include necessary warnings and constraints
+5. **Integration Awareness**: Consider how the agent interacts with other components
+
+### 5. Testing Your Micro-Agent
+
+Before submitting:
+1. Test the agent with various prompts
+2. Verify trigger words activate the agent correctly
+3. Ensure instructions are clear and comprehensive
+4. Check for potential conflicts with existing agents
+
+### 6. Example Implementation
+
+Here's a template for a new micro-agent:
+
+```markdown
+---
+name: docker
+agent: CodeActAgent
+triggers:
+- docker
+- container
+---
+
+You are responsible for Docker container management and Dockerfile creation.
+
+Key responsibilities:
+1. Create and modify Dockerfiles
+2. Manage container lifecycle
+3. Handle Docker Compose configurations
+
+Guidelines:
+- Always use official base images when possible
+- Include necessary security considerations
+- Follow Docker best practices for layer optimization
+
+Examples:
+1. Creating a Dockerfile:
+   ```dockerfile
+   FROM node:18-alpine
+   WORKDIR /app
+   COPY package*.json ./
+   RUN npm install
+   COPY . .
+   CMD ["npm", "start"]
+   ```
+
+2. Docker Compose usage:
+   ```yaml
+   version: '3'
+   services:
+     web:
+       build: .
+       ports:
+         - "3000:3000"
+   ```
+
+Remember to:
+- Validate Dockerfile syntax
+- Check for security vulnerabilities
+- Optimize for build time and image size
+```
+
+### 7. Submission Process
+
+1. Create your micro-agent file in the correct directory
+2. Test thoroughly
+3. Submit a pull request with:
+   - The new micro-agent file
+   - Updated documentation if needed
+   - Description of the agent's purpose and capabilities
+
+Remember that micro-agents are a powerful way to extend OpenHands' capabilities in specific domains. Well-designed agents can significantly improve the system's ability to handle specialized tasks.
--- a/docs/modules/usage/prompting-best-practices.md
+++ b/docs/modules/usage/prompting-best-practices.md
@@ -2,6 +2,11 @@

 When working with OpenHands AI software developer, it's crucial to provide clear and effective prompts. This guide outlines best practices for creating prompts that will yield the most accurate and useful responses.

+## Table of Contents
+
+- [Characteristics of Good Prompts](#characteristics-of-good-prompts)
+- [Customizing Prompts for your Project](#customizing-prompts-for-your-project)
+
 ## Characteristics of Good Prompts

 Good prompts are:
@@ -39,3 +44,63 @@ Good prompts are:
 Remember, the more precise and informative your prompt is, the better the AI can assist you in developing or modifying the OpenHands software.

 See [Getting Started with OpenHands](./getting-started) for more examples of helpful prompts.
+
+## Customizing Prompts for your Project
+
+OpenHands can be customized to work more effectively with specific repositories by providing repository-specific context and guidelines. This section explains how to optimize OpenHands for your project.
+
+### Repository Configuration
+
+You can customize OpenHands' behavior for your repository by creating a `.openhands_instructions` file in your repository's root directory. This file should contain:
+
+1. **Repository Overview**: A brief description of your project's purpose and architecture
+2. **Directory Structure**: Key directories and their purposes
+3. **Development Guidelines**: Project-specific coding standards and practices
+4. **Testing Requirements**: How to run tests and what types of tests are required
+5. **Setup Instructions**: Steps needed to build and run the project
+
+Example `.openhands_instructions` file:
+```
+Repository: MyProject
+Description: A web application for task management
+
+Directory Structure:
+- src/: Main application code
+- tests/: Test files
+- docs/: Documentation
+
+Setup:
+- Run `npm install` to install dependencies
+- Use `npm run dev` for development
+- Run `npm test` for testing
+
+Guidelines:
+- Follow ESLint configuration
+- Write tests for all new features
+- Use TypeScript for new code
+```
+
+### Customizing Prompts
+
+When working with a customized repository:
+
+1. **Reference Project Standards**: Mention specific coding standards or patterns used in your project
+2. **Include Context**: Reference relevant documentation or existing implementations
+3. **Specify Testing Requirements**: Include project-specific testing requirements in your prompts
+
+Example customized prompt:
+```
+Add a new task completion feature to src/components/TaskList.tsx following our existing component patterns.
+Include unit tests in tests/components/ and update the documentation in docs/features/.
+The component should use our shared styling from src/styles/components.
+```
+
+### Best Practices for Repository Customization
+
+1. **Keep Instructions Updated**: Regularly update your `.openhands_instructions` file as your project evolves
+2. **Be Specific**: Include specific paths, patterns, and requirements unique to your project
+3. **Document Dependencies**: List all tools and dependencies required for development
+4. **Include Examples**: Provide examples of good code patterns from your project
+5. **Specify Conventions**: Document naming conventions, file organization, and code style preferences
+
+By customizing OpenHands for your repository, you'll get more accurate and consistent results that align with your project's standards and requirements.
--- a/docs/modules/usage/runtimes.md
+++ b/docs/modules/usage/runtimes.md
@@ -16,7 +16,7 @@ some flags being passed to `docker run` that make this possible:

 ```
 docker run # ...
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.15-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.16-nikolaik \
    -v /var/run/docker.sock:/var/run/docker.sock \
    # ...
 ```
@@ -28,12 +28,22 @@ You can also [build your own runtime image](how-to/custom-sandbox-guide).
 ### Connecting to Your filesystem
 One useful feature here is the ability to connect to your local filesystem.

-To mount your filesystem into the runtime, add the following options to
-the `docker run` command:
-
+To mount your filesystem into the runtime, first set WORKSPACE_BASE:
 ```bash
 export WORKSPACE_BASE=/path/to/your/code

+# Linux and Mac Example
+# export WORKSPACE_BASE=$HOME/OpenHands
+# Will set $WORKSPACE_BASE to /home/<username>/OpenHands
+#
+# WSL on Windows Example
+# export WORKSPACE_BASE=/mnt/c/dev/OpenHands
+# Will set $WORKSPACE_BASE to C:\dev\OpenHands
+```
+
+then add the following options to the `docker run` command:
+
+```bash
 docker run # ...
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
--- a/docs/modules/usage/troubleshooting/troubleshooting.md
+++ b/docs/modules/usage/troubleshooting/troubleshooting.md
@@ -1,180 +1,44 @@
 # 🚧 Troubleshooting

-There are some error messages that frequently get reported by users.
-We'll try to make the install process easier, but for now you can look for your error message below and see if there are any workarounds.
-If you find more information or a workaround for one of these issues, please open a *PR* to add details to this file.
-
 :::tip
-OpenHands only supports Windows via [WSL](https://learn.microsoft.com/en-us/windows/wsl/install).
-Please be sure to run all commands inside your WSL terminal.
-Check out [Notes for WSL on Windows Users](troubleshooting/windows) for some troubleshooting guides.
+OpenHands only supports Windows via WSL. Please be sure to run all commands inside your WSL terminal.
 :::

-## Common Issues
+### Launch docker client failed

-* [Unable to connect to Docker](#unable-to-connect-to-docker)
-* [404 Resource not found](#404-resource-not-found)
-* [`make build` getting stuck on package installations](#make-build-getting-stuck-on-package-installations)
-* [Sessions are not restored](#sessions-are-not-restored)
-* [Connection to host.docker.internal timed out](#connection-to-host-docker-internal-timed-out)
+**Description**

-### Unable to connect to Docker
-
-[GitHub Issue](https://github.com/All-Hands-AI/OpenHands/issues/1226)
-
-**Symptoms**
-
-```bash
-Error creating controller. Please check Docker is running and visit `https://docs.all-hands.dev/modules/usage/troubleshooting` for more debugging information.
+When running OpenHands, the following error is seen:
+```
+Launch docker client failed. Please make sure you have installed docker and started docker desktop/daemon.
 ```

-```bash
-docker.errors.DockerException: Error while fetching server API version: ('Connection aborted.', FileNotFoundError(2, 'No such file or directory'))
-```
-
-**Details**
-
-OpenHands uses a Docker container to do its work safely, without potentially breaking your machine.
-
-**Workarounds**
-
-* Run `docker ps` to ensure that docker is running
-* Make sure you don't need `sudo` to run docker [see here](https://www.baeldung.com/linux/docker-run-without-sudo)
-* If you are on a Mac, check the [permissions requirements](https://docs.docker.com/desktop/mac/permission-requirements/) and in particular consider enabling the `Allow the default Docker socket to be used` under `Settings > Advanced` in Docker Desktop.
-* In addition, upgrade your Docker to the latest version under `Check for Updates`
+**Resolution**

+Try these in order:
+* Confirm `docker` is running on your system. You should be able to run `docker ps` in the terminal successfully.
+* If using Docker Desktop, ensure `Settings > Advanced > Allow the default Docker socket to be used` is enabled.
+* Depending on your configuration you may need `Settings > Resources > Network > Enable host networking` enabled in Docker Desktop.
+* Reinstall Docker Desktop.
 ---
-### `404 Resource not found`

-**Symptoms**
+# Development Workflow Specific
+### Error building runtime docker image

-```python
-Traceback (most recent call last):
-  File "/app/.venv/lib/python3.12/site-packages/litellm/llms/openai.py", line 414, in completion
-    raise e
-  File "/app/.venv/lib/python3.12/site-packages/litellm/llms/openai.py", line 373, in completion
-    response = openai_client.chat.completions.create(**data, timeout=timeout)  # type: ignore
-               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/app/.venv/lib/python3.12/site-packages/openai/_utils/_utils.py", line 277, in wrapper
-    return func(*args, **kwargs)
-           ^^^^^^^^^^^^^^^^^^^^^
-  File "/app/.venv/lib/python3.12/site-packages/openai/resources/chat/completions.py", line 579, in create
-    return self._post(
-           ^^^^^^^^^^^
-  File "/app/.venv/lib/python3.12/site-packages/openai/_base_client.py", line 1232, in post
-    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
-                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/app/.venv/lib/python3.12/site-packages/openai/_base_client.py", line 921, in request
-    return self._request(
-           ^^^^^^^^^^^^^^
-  File "/app/.venv/lib/python3.12/site-packages/openai/_base_client.py", line 1012, in _request
-    raise self._make_status_error_from_response(err.response) from None
-openai.NotFoundError: Error code: 404 - {'error': {'code': '404', 'message': 'Resource not found'}}
+**Description**
+
+Attempts to start a new session fail, and errors with terms like the following appear in the logs:
+```
+debian-security bookworm-security
+InRelease At least one invalid signature was encountered.
 ```

-**Details**
+This seems to happen when the hash of an existing external library changes and your local docker instance has
+cached a previous version. To work around this, please try the following:

-This happens when LiteLLM (our library for connecting to different LLM providers) can't find
-the API endpoint you're trying to connect to. Most often this happens for Azure or ollama users.
-
-**Workarounds**
-
-* Check that you've set `LLM_BASE_URL` properly
-* Check that the model is set properly, based on the [LiteLLM docs](https://docs.litellm.ai/docs/providers)
-  * If you're running inside the UI, be sure to set the `model` in the settings modal
-  * If you're running headless (via main.py) be sure to set `LLM_MODEL` in your env/config
-* Make sure you've followed any special instructions for your LLM provider
-  * [Azure](/modules/usage/llms/azure-llms)
-  * [Google](/modules/usage/llms/google-llms)
-* Make sure your API key is correct
-* See if you can connect to the LLM using `curl`
-* Try [connecting via LiteLLM directly](https://github.com/BerriAI/litellm) to test your setup
-
---
-### `make build` getting stuck on package installations
-
-**Symptoms**
-
-Package installation stuck on `Pending...` without any error message:
-
-```bash
-Package operations: 286 installs, 0 updates, 0 removals
-
-  - Installing certifi (2024.2.2): Pending...
-  - Installing h11 (0.14.0): Pending...
-  - Installing idna (3.7): Pending...
-  - Installing sniffio (1.3.1): Pending...
-  - Installing typing-extensions (4.11.0): Pending...
-```
-
-**Details**
-
-In rare cases, `make build` can seemingly get stuck on package installations
-without any error message.
-
-**Workarounds**
-
-The package installer Poetry may miss a configuration setting for where credentials are to be looked up (keyring).
-
-First check with `env` if a value for `PYTHON_KEYRING_BACKEND` exists.
-If not, run the below command to set it to a known value and retry the build:
-
-```bash
-export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring
-```
-
---
-### Sessions are not restored
-
-**Symptoms**
-
-OpenHands usually asks whether to resume or start a new session when opening the UI.
-But clicking "Resume" still starts a fresh new chat.
-
-**Details**
-
-With a standard installation as of today session data is stored in memory.
-Currently, if OpenHands's service is restarted, previous sessions become
-invalid (a new secret is generated) and thus not recoverable.
-
-**Workarounds**
-
-* Change configuration to make sessions persistent by editing the `config.toml`
-file (in OpenHands's root folder) by specifying a `file_store` and an
-absolute `file_store_path`:
-
-```toml
-file_store="local"
-file_store_path="/absolute/path/to/openhands/cache/directory"
-```
-
-* Add a fixed jwt secret in your .bashrc, like below, so that previous session id's
-should stay accepted.
-
-```bash
-EXPORT JWT_SECRET=A_CONST_VALUE
-```
-
---
-### Connection to host docker internal timed out
-
-**Symptoms**
-
-When you start the server using the docker command from the main [README](https://github.com/All-Hands-AI/OpenHands/README.md), you get a long timeout
-followed by the a stack trace containing messages like:
-
-* `Connection to host.docker.internal timed out. (connect timeout=310)`
-* `Max retries exceeded with url: /alive`
-
-**Details**
-
-If Docker Engine is installed rather than Docker Desktop, the main command will not work as expected.
-Docker Desktop includes easy DNS configuration for connecting processes running in different containers
-which OpenHands makes use of when the main server is running inside a docker container.
-(Further details: https://forums.docker.com/t/difference-between-docker-desktop-and-docker-engine/124612)
-
-**Workarounds**
-
-* [Install Docker Desktop](https://www.docker.com/products/docker-desktop/)
-* Run OpenHands in [Development Mode](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md),
-  So that the main server is not run inside a container, but still creates dockerized runtime sandboxes.
+* Stop any containers where the name has the prefix `openhands-runtime-` :
+  `docker ps --filter name=openhands-runtime- --filter status=running -aq | xargs docker stop`
+* Remove any containers where the name has the prefix `openhands-runtime-` :
+  `docker rmi $(docker images --filter name=openhands-runtime- -q --no-trunc)`
+* Stop and Remove any containers / images where the name has the prefix `openhands-runtime-`
+* Prune containers / images : `docker container prune -f && docker image prune -f`
--- a/docs/modules/usage/troubleshooting/windows.md
+++ b/docs/modules/usage/troubleshooting/windows.md
@@ -1,64 +0,0 @@
-# Notes for WSL on Windows Users
-
-OpenHands only supports Windows via [WSL](https://learn.microsoft.com/en-us/windows/wsl/install).
-Please be sure to run all commands inside your WSL terminal.
-
-## Troubleshooting
-
-### Recommendation: Do not run as root user
-
-For security reasons, it is highly recommended to not run OpenHands as the root user, but a user with a non-zero UID.
-
-References:
-
-* [Why it is bad to login as root](https://askubuntu.com/questions/16178/why-is-it-bad-to-log-in-as-root)
-* [Set default user in WSL](https://www.tenforums.com/tutorials/128152-set-default-user-windows-subsystem-linux-distro-windows-10-a.html#option2)
-Hint about the 2nd reference: for Ubuntu users, the command could actually be "ubuntupreview" instead of "ubuntu".
-
---
-### Error: 'docker' could not be found in this WSL 2 distro.
-
-If you are using Docker Desktop, make sure to start it before calling any docker command from inside WSL.
-Docker also needs to have the WSL integration option activated.
-
---
-### Poetry Installation
-
-* If you face issues running Poetry even after installing it during the build process, you may need to add its binary path to your environment:
-
-```sh
-export PATH="$HOME/.local/bin:$PATH"
-```
-
-* If make build stops on an error like this:
-
-```sh
-ModuleNotFoundError: no module named <module-name>
-```
-
-This could be an issue with Poetry's cache.
-Try to run these 2 commands after another:
-
-```sh
-rm -r ~/.cache/pypoetry
-make build
-```
-
---
-### NoneType object has no attribute 'request'
-
-If you are experiencing issues related to networking, such as `NoneType object has no attribute 'request'` when executing `make run`, you may need to configure your WSL2 networking settings. Follow these steps:
-
-* Open or create the `.wslconfig` file located at `C:\Users\%username%\.wslconfig` on your Windows host machine.
-* Add the following configuration to the `.wslconfig` file:
-
-```sh
-[wsl2]
-networkingMode=mirrored
-localhostForwarding=true
-```
-
-* Save the `.wslconfig` file.
-* Restart WSL2 completely by exiting any running WSL2 instances and executing the command `wsl --shutdown` in your command prompt or terminal.
-* After restarting WSL, attempt to execute `make run` again.
-The networking issue should be resolved.
--- a/docs/modules/usage/upgrade-guide.md
+++ b/docs/modules/usage/upgrade-guide.md
@@ -1,71 +0,0 @@
-# ⬆️ Upgrade Guide
-
-## 0.8.0 (2024-07-13)
-
-### Config breaking changes
-
-In this release we introduced a few breaking changes to backend configurations.
-If you have only been using OpenHands via frontend (web GUI), nothing needs
-to be taken care of.
-
-Here's a list of breaking changes in configs. They only apply to users who
-use OpenHands CLI via `main.py`. For more detail, see [#2756](https://github.com/All-Hands-AI/OpenHands/pull/2756).
-
-#### Removal of --model-name option from main.py
-
-Please note that `--model-name`, or `-m` option, no longer exists. You should set up the LLM
-configs in `config.toml` or via environmental variables.
-
-#### LLM config groups must be subgroups of 'llm'
-
-Prior to release 0.8, you can use arbitrary name for llm config in `config.toml`, e.g.
-
-```toml
-[gpt-4o]
-model="gpt-4o"
-api_key="<your_api_key>"
-```
-
-and then use `--llm-config` CLI argument to specify the desired LLM config group
-by name. This no longer works. Instead, the config group must be under `llm` group,
-e.g.:
-
-```toml
-[llm.gpt-4o]
-model="gpt-4o"
-api_key="<your_api_key>"
-```
-
-If you have a config group named `llm`, no need to change it, it will be used
-as the default LLM config group.
-
-#### 'agent' group no longer contains 'name' field
-
-Prior to release 0.8, you may or may not have a config group named `agent` that
-looks like this:
-
-```toml
-[agent]
-name="CodeActAgent"
-memory_max_threads=2
-```
-
-Note the `name` field is now removed. Instead, you should put `default_agent` field
-under `core` group, e.g.
-
-```toml
-[core]
-# other configs
-default_agent='CodeActAgent'
-
-[agent]
-llm_config='llm'
-memory_max_threads=2
-
-[agent.CodeActAgent]
-llm_config='gpt-4o'
-```
-
-Note that similar to `llm` subgroups, you can also define `agent` subgroups.
-Moreover, an agent can be associated with a specific LLM config group. For more
-detail, see the examples in `config.template.toml`.
--- a/docs/package-lock.json
+++ b/docs/package-lock.json
@@ -14,11 +14,11 @@
        "@docusaurus/theme-mermaid": "^3.6.3",
        "@mdx-js/react": "^3.1.0",
        "clsx": "^2.0.0",
-        "prism-react-renderer": "^2.4.0",
+        "prism-react-renderer": "^2.4.1",
        "react": "^18.3.1",
        "react-dom": "^18.3.1",
        "react-icons": "^5.4.0",
-        "react-use": "^17.5.1"
+        "react-use": "^17.6.0"
      },
      "devDependencies": {
        "@docusaurus/module-type-aliases": "^3.5.1",
@@ -14781,9 +14781,9 @@
      }
    },
    "node_modules/prism-react-renderer": {
-      "version": "2.4.0",
-      "resolved": "https://registry.npmjs.org/prism-react-renderer/-/prism-react-renderer-2.4.0.tgz",
-      "integrity": "sha512-327BsVCD/unU4CNLZTWVHyUHKnsqcvj2qbPlQ8MiBE2eq2rgctjigPA1Gp9HLF83kZ20zNN6jgizHJeEsyFYOw==",
+      "version": "2.4.1",
+      "resolved": "https://registry.npmjs.org/prism-react-renderer/-/prism-react-renderer-2.4.1.tgz",
+      "integrity": "sha512-ey8Ls/+Di31eqzUxC46h8MksNuGx/n0AAC8uKpwFau4RPDYLuE3EXTp8N8G2vX2N7UC/+IXeNUnlWBGGcAG+Ig==",
      "dependencies": {
        "@types/prismjs": "^1.26.0",
        "clsx": "^2.0.0"
@@ -15264,9 +15264,9 @@
      }
    },
    "node_modules/react-use": {
-      "version": "17.5.1",
-      "resolved": "https://registry.npmjs.org/react-use/-/react-use-17.5.1.tgz",
-      "integrity": "sha512-LG/uPEVRflLWMwi3j/sZqR00nF6JGqTTDblkXK2nzXsIvij06hXl1V/MZIlwj1OKIQUtlh1l9jK8gLsRyCQxMg==",
+      "version": "17.6.0",
+      "resolved": "https://registry.npmjs.org/react-use/-/react-use-17.6.0.tgz",
+      "integrity": "sha512-OmedEScUMKFfzn1Ir8dBxiLLSOzhKe/dPZwVxcujweSj45aNM7BEGPb9BEVIgVEqEXx6f3/TsXzwIktNgUR02g==",
      "dependencies": {
        "@types/js-cookie": "^2.2.6",
        "@xobotyi/scrollbar-width": "^1.9.5",
--- a/docs/package.json
+++ b/docs/package.json
@@ -21,11 +21,11 @@
    "@docusaurus/theme-mermaid": "^3.6.3",
    "@mdx-js/react": "^3.1.0",
    "clsx": "^2.0.0",
-    "prism-react-renderer": "^2.4.0",
+    "prism-react-renderer": "^2.4.1",
    "react": "^18.3.1",
    "react-dom": "^18.3.1",
    "react-icons": "^5.4.0",
-    "react-use": "^17.5.1"
+    "react-use": "^17.6.0"
  },
  "devDependencies": {
    "@docusaurus/module-type-aliases": "^3.5.1",
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@@ -14,9 +14,20 @@ const sidebars: SidebarsConfig = {
      id: 'usage/getting-started',
    },
    {
-      type: 'doc',
-      label: 'Prompting Best Practices',
-      id: 'usage/prompting-best-practices',
+      type: 'category',
+      label: 'Prompting',
+      items: [
+        {
+          type: 'doc',
+          label: 'Best Practices',
+          id: 'usage/prompting-best-practices',
+        },
+        {
+          type: 'doc',
+          label: 'Micro-Agents',
+          id: 'usage/micro-agents',
+        },
+      ],
    },
    {
      type: 'category',
@@ -110,6 +121,11 @@ const sidebars: SidebarsConfig = {
          label: 'Custom Sandbox',
          id: 'usage/how-to/custom-sandbox-guide',
        },
+        {
+          type: 'doc',
+          label: 'Persist Session Data',
+          id: 'usage/how-to/persist-session-data',
+        },
      ],
    },
    {
@@ -152,11 +168,6 @@ const sidebars: SidebarsConfig = {
          label: 'Evaluation',
          id: 'usage/how-to/evaluation-harness',
        },
-        {
-          type: 'doc',
-          label: 'Kubernetes Deployment',
-          id: 'usage/how-to/openshift-example',
-        },
      ],
    },
    {
--- a/docs/src/components/CustomFooter.tsx
+++ b/docs/src/components/CustomFooter.tsx
@@ -8,7 +8,7 @@ function CustomFooter() {
    <footer className="custom-footer">
      <div className="footer-content">
        <div className="footer-icons">
-          <a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2vbfigwev-G03twSpXaErwzYVD4CFiBg" target="_blank" rel="noopener noreferrer">
+          <a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2wkh4pklz-w~h_DVDtEe9H5kyQlcNxVw" target="_blank" rel="noopener noreferrer">
            <FaSlack />
          </a>
          <a href="https://discord.gg/ESHStjSjD4" target="_blank" rel="noopener noreferrer">
--- a/docs/src/components/HomepageHeader/HomepageHeader.tsx
+++ b/docs/src/components/HomepageHeader/HomepageHeader.tsx
@@ -23,7 +23,7 @@ export function HomepageHeader() {
          <a href="https://codecov.io/github/All-Hands-AI/OpenHands?branch=main"><img alt="CodeCov" src="https://img.shields.io/codecov/c/github/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" /></a>
          <a href="https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE"><img src="https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="MIT License" /></a>
          <br/>
-          <a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2vbfigwev-G03twSpXaErwzYVD4CFiBg"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community" /></a>
+          <a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2wkh4pklz-w~h_DVDtEe9H5kyQlcNxVw"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community" /></a>
          <a href="https://discord.gg/ESHStjSjD4"><img src="https://img.shields.io/badge/Discord-Join%20Us-purple?logo=discord&logoColor=white&style=for-the-badge" alt="Join our Discord community" /></a>
          <a href="https://github.com/All-Hands-AI/OpenHands/blob/main/CREDITS.md"><img src="https://img.shields.io/badge/Project-Credits-blue?style=for-the-badge&color=FFE165&logo=github&logoColor=white" alt="Credits" /></a>
          <br/>
--- a/evaluation/README.md
+++ b/evaluation/README.md
@@ -42,7 +42,7 @@ temperature = 0.0

 ## Supported Benchmarks

-The OpenHands evaluation harness supports a wide variety of benchmarks across [software engineering](#software-engineering), [web browsing](#web-browsing), and [miscellaneous assistance](#misc-assistance) tasks.
+The OpenHands evaluation harness supports a wide variety of benchmarks across [software engineering](#software-engineering), [web browsing](#web-browsing), [miscellaneous assistance](#misc-assistance), and [real-world](#real-world) tasks.

 ### Software Engineering

@@ -73,6 +73,10 @@ The OpenHands evaluation harness supports a wide variety of benchmarks across [s
 - ProofWriter: [`evaluation/benchmarks/logic_reasoning`](./benchmarks/logic_reasoning)
 - ScienceAgentBench: [`evaluation/benchmarks/scienceagentbench`](./benchmarks/scienceagentbench)

+### Real World
+
+- TheAgentCompany: [`evaluation/benchmarks/the_agent_company`](./benchmarks/the_agent_company)
+
 ## Result Visualization

 Check [this huggingface space](https://huggingface.co/spaces/OpenHands/evaluation) for visualization of existing experimental results.
--- a/evaluation/benchmarks/EDA/run_infer.py
+++ b/evaluation/benchmarks/EDA/run_infer.py
@@ -202,6 +202,9 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
+
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

--- a/evaluation/benchmarks/agent_bench/run_infer.py
+++ b/evaluation/benchmarks/agent_bench/run_infer.py
@@ -307,6 +307,8 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False

    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
--- a/evaluation/benchmarks/aider_bench/run_infer.py
+++ b/evaluation/benchmarks/aider_bench/run_infer.py
@@ -279,6 +279,8 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False

    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
--- a/evaluation/benchmarks/biocoder/run_infer.py
+++ b/evaluation/benchmarks/biocoder/run_infer.py
@@ -328,6 +328,8 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False

    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
--- a/evaluation/benchmarks/bird/run_infer.py
+++ b/evaluation/benchmarks/bird/run_infer.py
@@ -456,6 +456,8 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

--- a/evaluation/benchmarks/browsing_delegation/run_infer.py
+++ b/evaluation/benchmarks/browsing_delegation/run_infer.py
@@ -142,6 +142,8 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False

    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
--- a/evaluation/benchmarks/commit0_bench/run_infer.py
+++ b/evaluation/benchmarks/commit0_bench/run_infer.py
@@ -571,6 +571,8 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
        llm_config.log_completions = True

    if llm_config is None:
--- a/evaluation/benchmarks/discoverybench/run_infer.py
+++ b/evaluation/benchmarks/discoverybench/run_infer.py
@@ -466,6 +466,8 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

--- a/evaluation/benchmarks/gaia/run_infer.py
+++ b/evaluation/benchmarks/gaia/run_infer.py
@@ -238,6 +238,9 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
+
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

--- a/evaluation/benchmarks/gorilla/run_infer.py
+++ b/evaluation/benchmarks/gorilla/run_infer.py
@@ -146,6 +146,8 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

--- a/evaluation/benchmarks/gpqa/run_infer.py
+++ b/evaluation/benchmarks/gpqa/run_infer.py
@@ -326,6 +326,9 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
+
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

--- a/evaluation/benchmarks/humanevalfix/run_infer.py
+++ b/evaluation/benchmarks/humanevalfix/run_infer.py
@@ -285,6 +285,8 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

--- a/evaluation/benchmarks/logic_reasoning/run_infer.py
+++ b/evaluation/benchmarks/logic_reasoning/run_infer.py
@@ -272,7 +272,7 @@ if __name__ == '__main__':
        default='ProofWriter',
    )
    parser.add_argument(
-        '--data_split',
+        '--data-split',
        type=str,
        help='data split to evaluate on {validation}',  # right now we only support validation split
        default='validation',
@@ -288,6 +288,8 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

--- a/evaluation/benchmarks/miniwob/run_infer.py
+++ b/evaluation/benchmarks/miniwob/run_infer.py
@@ -231,6 +231,8 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

--- a/evaluation/benchmarks/mint/run_infer.py
+++ b/evaluation/benchmarks/mint/run_infer.py
@@ -279,6 +279,8 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

--- a/evaluation/benchmarks/ml_bench/run_analysis.py
+++ b/evaluation/benchmarks/ml_bench/run_analysis.py
@@ -124,6 +124,9 @@ if __name__ == '__main__':
    # for details of how to set `llm_config`
    if args.llm_config:
        specified_llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        specified_llm_config.modify_params = False
+
        if specified_llm_config:
            config.llm = specified_llm_config
    logger.info(f'Config for evaluation: {config}')
--- a/evaluation/benchmarks/ml_bench/run_infer.py
+++ b/evaluation/benchmarks/ml_bench/run_infer.py
@@ -292,6 +292,8 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

--- a/evaluation/benchmarks/scienceagentbench/run_infer.py
+++ b/evaluation/benchmarks/scienceagentbench/run_infer.py
@@ -251,7 +251,7 @@ If the program uses some packages that are incompatible, please figure out alter
 if __name__ == '__main__':
    parser = get_parser()
    parser.add_argument(
-        '--use_knowledge',
+        '--use-knowledge',
        type=str,
        default='false',
        choices=['true', 'false'],
@@ -272,6 +272,8 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

--- a/evaluation/benchmarks/scienceagentbench/scripts/run_infer.sh
+++ b/evaluation/benchmarks/scienceagentbench/scripts/run_infer.sh
@@ -35,7 +35,7 @@ echo "MODEL_CONFIG: $MODEL_CONFIG"
 COMMAND="poetry run python evaluation/benchmarks/scienceagentbench/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
-  --use_knowledge $USE_KNOWLEDGE \
+  --use-knowledge $USE_KNOWLEDGE \
  --max-iterations 30 \
  --eval-num-workers $NUM_WORKERS \
  --eval-note $OPENHANDS_VERSION" \
--- a/evaluation/benchmarks/swe_bench/prompt.py
+++ b/evaluation/benchmarks/swe_bench/prompt.py
@@ -1,28 +0,0 @@
-CODEACT_SWE_PROMPT = """Now, you're going to solve this issue on your own. Your terminal session has started and you're in the repository's root directory. You can use any bash commands or the special interface to help you. Edit all the files you need to and run any checks or tests that you want.
-Remember, YOU CAN ONLY ENTER ONE COMMAND AT A TIME. You should always wait for feedback after every command.
-When you're satisfied with all of the changes you've made, you can use the "finish" tool to finish the interaction.
-Note however that you cannot use any interactive session commands (e.g. vim) in this environment, but you can write scripts and run them. E.g. you can write a python script and then run it with `python <script_name>.py`.
-
-NOTE ABOUT THE EDIT COMMAND: Indentation really matters! When editing a file, make sure to insert appropriate indentation before each line!
-
-IMPORTANT TIPS:
-1. Always start by trying to replicate the bug that the issues discusses.
-    If the issue includes code for reproducing the bug, we recommend that you re-implement that in your environment, and run it to make sure you can reproduce the bug.
-    Then start trying to fix it.
-    When you think you've fixed the bug, re-run the bug reproduction script to make sure that the bug has indeed been fixed.
-
-    If the bug reproduction script does not print anything when it successfully runs, we recommend adding a print("Script completed successfully, no errors.") command at the end of the file,
-    so that you can be sure that the script indeed ran fine all the way through.
-
-2. If you run a command and it doesn't work, try running a different command. A command that did not work once will not work the second time unless you modify it!
-
-3. If you open a file and need to get to an area around a specific line that is not in the first 100 lines, say line 583, don't just use the scroll_down command multiple times. Instead, use the goto 583 command. It's much quicker.
-
-4. If the bug reproduction script requires inputting/reading a specific file, such as buggy-input.png, and you'd like to understand how to input that file, conduct a search in the existing repo code, to see whether someone else has already done that. Do this by running the command: find_file("buggy-input.png") If that doesn't work, use the linux 'find' command.
-
-5. Always make sure to look at the currently open file and the current working directory (which appears right after the currently open file). The currently open file might be in a different directory than the working directory! Note that some commands, such as 'create', open files, so they might change the current  open file.
-
-6. When editing files, it is easy to accidentally specify a wrong line number or to write code with incorrect indentation. Always check the code after you issue an edit to make sure that it reflects what you wanted to accomplish. If it didn't, issue another command to fix it.
-
-[Current directory: /workspace/{workspace_dir_name}]
-"""
--- a/evaluation/benchmarks/swe_bench/run_infer.py
+++ b/evaluation/benchmarks/swe_bench/run_infer.py
@@ -9,13 +9,13 @@ import toml
 from datasets import load_dataset

 import openhands.agenthub
-from evaluation.benchmarks.swe_bench.prompt import CODEACT_SWE_PROMPT
 from evaluation.utils.shared import (
    EvalException,
    EvalMetadata,
    EvalOutput,
    assert_and_raise,
    codeact_user_response,
+    is_fatal_evaluation_error,
    make_metadata,
    prepare_dataset,
    reset_logger_for_multiprocessing,
@@ -45,7 +45,6 @@ RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'tru

 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
    'CodeActAgent': codeact_user_response,
-    'CodeActSWEAgent': codeact_user_response,
 }


@@ -56,40 +55,28 @@ def _get_swebench_workspace_dir_name(instance: pd.Series) -> str:
 def get_instruction(instance: pd.Series, metadata: EvalMetadata):
    workspace_dir_name = _get_swebench_workspace_dir_name(instance)
    # Prepare instruction
-    if metadata.agent_class == 'CodeActSWEAgent':
-        instruction = (
-            'We are currently solving the following issue within our repository. Here is the issue text:\n'
-            '--- BEGIN ISSUE ---\n'
-            f'{instance.problem_statement}\n'
-            '--- END ISSUE ---\n\n'
-        )
-        if USE_HINT_TEXT and instance.hints_text:
-            instruction += (
-                f'--- BEGIN HINTS ---\n{instance.hints_text}\n--- END HINTS ---\n'
-            )
-        instruction += CODEACT_SWE_PROMPT.format(workspace_dir_name=workspace_dir_name)
-    else:
-        # Instruction based on Anthropic's official trajectory
-        # https://github.com/eschluntz/swe-bench-experiments/tree/main/evaluation/verified/20241022_tools_claude-3-5-sonnet-updated/trajs
-        instruction = (
-            '<uploaded_files>\n'
-            f'/workspace/{workspace_dir_name}\n'
-            '</uploaded_files>\n'
-            f"I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following PR description:\n\n"
-            f'<pr_description>\n'
-            f'{instance.problem_statement}\n'
-            '</pr_description>\n\n'
-            'Can you help me implement the necessary changes to the repository so that the requirements specified in the <pr_description> are met?\n'
-            "I've already taken care of all changes to any of the test files described in the <pr_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
-            'Your task is to make the minimal changes to non-tests files in the /workspace directory to ensure the <pr_description> is satisfied.\n'
-            'Follow these steps to resolve the issue:\n'
-            '1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
-            '2. Create a script to reproduce the error and execute it with `python <filename.py>` using the BashTool, to confirm the error\n'
-            '3. Edit the sourcecode of the repo to resolve the issue\n'
-            '4. Rerun your reproduce script and confirm that the error is fixed!\n'
-            '5. Think about edgecases and make sure your fix handles them as well\n'
-            "Your thinking should be thorough and so it's fine if it's very long.\n"
-        )
+
+    # Instruction based on Anthropic's official trajectory
+    # https://github.com/eschluntz/swe-bench-experiments/tree/main/evaluation/verified/20241022_tools_claude-3-5-sonnet-updated/trajs
+    instruction = (
+        '<uploaded_files>\n'
+        f'/workspace/{workspace_dir_name}\n'
+        '</uploaded_files>\n'
+        f"I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following PR description:\n\n"
+        f'<pr_description>\n'
+        f'{instance.problem_statement}\n'
+        '</pr_description>\n\n'
+        'Can you help me implement the necessary changes to the repository so that the requirements specified in the <pr_description> are met?\n'
+        "I've already taken care of all changes to any of the test files described in the <pr_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
+        'Your task is to make the minimal changes to non-tests files in the /workspace directory to ensure the <pr_description> is satisfied.\n'
+        'Follow these steps to resolve the issue:\n'
+        '1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
+        '2. Create a script to reproduce the error and execute it with `python <filename.py>` using the BashTool, to confirm the error\n'
+        '3. Edit the sourcecode of the repo to resolve the issue\n'
+        '4. Rerun your reproduce script and confirm that the error is fixed!\n'
+        '5. Think about edgecases and make sure your fix handles them as well\n'
+        "Your thinking should be thorough and so it's fine if it's very long.\n"
+    )

    if RUN_WITH_BROWSING:
        instruction += (
@@ -383,6 +370,7 @@ def process_instance(
    instance: pd.Series,
    metadata: EvalMetadata,
    reset_logger: bool = True,
+    runtime_failure_count: int = 0,
 ) -> EvalOutput:
    config = get_config(instance, metadata)

@@ -393,6 +381,15 @@ def process_instance(
    else:
        logger.info(f'Starting evaluation for instance {instance.instance_id}.')

+    # Increase resource_factor with increasing attempt_id
+    if runtime_failure_count > 0:
+        config.sandbox.remote_runtime_resource_factor = min(
+            config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
+            2,  # hardcode maximum resource factor to 2
+        )
+        logger.warning(
+            f'This is the second attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
+        )
    runtime = create_runtime(config)
    call_async_from_sync(runtime.connect)

@@ -414,11 +411,7 @@ def process_instance(
        )

        # if fatal error, throw EvalError to trigger re-run
-        if (
-            state.last_error
-            and 'fatal error during agent execution' in state.last_error
-            and 'stuck in a loop' not in state.last_error
-        ):
+        if is_fatal_evaluation_error(state.last_error):
            raise EvalException('Fatal error detected: ' + state.last_error)

        # ======= THIS IS SWE-Bench specific =======
@@ -504,6 +497,8 @@ if __name__ == '__main__':
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
        llm_config.log_completions = True
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False

    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
--- a/evaluation/benchmarks/swe_bench/scripts/eval/summarize_outputs.py
+++ b/evaluation/benchmarks/swe_bench/scripts/eval/summarize_outputs.py
@@ -6,6 +6,8 @@ import os
 from collections import Counter

 import pandas as pd
+import random
+import numpy as np

 from openhands.events.serialization import event_from_dict
 from openhands.events.utils import get_pairs_from_events
@@ -18,6 +20,18 @@ ERROR_KEYWORDS = [
 ]


+def get_bootstrap_accuracy_error_bars(values: float | int | bool, num_samples: int = 1000, p_value=0.05) -> tuple[float, float]:
+    sorted_vals = np.sort(
+        [
+            np.mean(random.sample(values, len(values) // 2))
+            for _ in range(num_samples)
+        ]
+    )
+    bottom_idx = int(num_samples * p_value / 2)
+    top_idx = int(num_samples * (1.0 - p_value / 2))
+    return (sorted_vals[bottom_idx], sorted_vals[top_idx])
+
+
 def process_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
@@ -26,6 +40,7 @@ def process_file(file_path):
    num_error_lines = 0
    num_agent_stuck_in_loop = 0
    num_resolved = 0
+    resolved_arr = []
    num_empty_patch = 0
    num_unfinished_runs = 0
    error_counter = Counter()
@@ -74,6 +89,9 @@ def process_file(file_path):
        resolved = report.get('resolved', False)
        if resolved:
            num_resolved += 1
+            resolved_arr.append(1)
+        else:
+            resolved_arr.append(0)

        # Error
        error = _d.get('error', None)
@@ -100,6 +118,7 @@ def process_file(file_path):
        'resolved': {
            'count': num_resolved,
            'percentage': (num_resolved / num_lines * 100) if num_lines > 0 else 0,
+            'ci': tuple(x * 100 for x in get_bootstrap_accuracy_error_bars(resolved_arr)),
        },
        'empty_patches': {
            'count': num_empty_patch,
@@ -128,6 +147,11 @@ def process_file(file_path):
                for error, count in error_counter.items()
            },
        },
+        'costs': {
+            'main_agent': sum(main_agent_cost),
+            'editor': sum(editor_cost),
+            'total': sum(main_agent_cost) + sum(editor_cost),
+        },
        'statistics': {
            'avg_turns': sum(num_turns) / num_lines if num_lines > 0 else 0,
            'costs': {
@@ -169,6 +193,7 @@ def aggregate_directory(input_path) -> pd.DataFrame:
    )

    df['resolve_rate'] = df['resolved'].apply(lambda x: x['percentage'])
+    df['resolve_rate_ci'] = df['resolved'].apply(lambda x: x['ci'])
    df['empty_patch_rate'] = df['empty_patches'].apply(lambda x: x['percentage'])
    df['unfinished_rate'] = df['unfinished_runs'].apply(lambda x: x['percentage'])
    df['avg_turns'] = df['statistics'].apply(lambda x: x['avg_turns'])
@@ -237,7 +262,7 @@ if __name__ == '__main__':
            # Print detailed results for single file
            print(f'\nResults for {args.input_path}:')
            print(
-                f"Number of resolved: {result['resolved']['count']} / {result['total_instances']} ({result['resolved']['percentage']:.2f}%)"
+                f"Number of resolved: {result['resolved']['count']} / {result['total_instances']} ({result['resolved']['percentage']:.2f}% [{result['resolved']['ci'][0]:.2f}%, {result['resolved']['ci'][1]:.2f}%])"
            )
            print(
                f"Number of empty patch: {result['empty_patches']['count']} / {result['total_instances']} ({result['empty_patches']['percentage']:.2f}%)"
@@ -251,6 +276,7 @@ if __name__ == '__main__':
            print(
                f"Number of unfinished runs: {result['unfinished_runs']['count']} / {result['total_instances']} ({result['unfinished_runs']['percentage']:.2f}%)"
            )
+            print(f"Total cost: {result['costs']['total']:.2f} USD")
            print('## Statistics')
            print(
                f"Avg. num of turns per instance: {result['statistics']['avg_turns']:.2f}"
--- a/evaluation/benchmarks/swe_bench/scripts/eval/verify_costs.py
+++ b/evaluation/benchmarks/swe_bench/scripts/eval/verify_costs.py
@@ -0,0 +1,104 @@
+import argparse
+
+import pandas as pd
+
+from openhands.core.logger import openhands_logger as logger
+
+
+def verify_instance_costs(row: pd.Series) -> float:
+    """
+    Verifies that the accumulated_cost matches the sum of individual costs in metrics.
+    Also checks for duplicate consecutive costs which might indicate buggy counting.
+    If the consecutive costs are identical, the file is affected by this bug:
+    https://github.com/All-Hands-AI/OpenHands/issues/5383
+
+    Args:
+        row: DataFrame row containing instance data with metrics
+    Returns:
+        float: The verified total cost for this instance (corrected if needed)
+    """
+    try:
+        metrics = row.get('metrics')
+        if not metrics:
+            logger.warning(f"Instance {row['instance_id']}: No metrics found")
+            return 0.0
+
+        accumulated = metrics.get('accumulated_cost')
+        costs = metrics.get('costs', [])
+
+        if accumulated is None:
+            logger.warning(
+                f"Instance {row['instance_id']}: No accumulated_cost in metrics"
+            )
+            return 0.0
+
+        # Check for duplicate consecutive costs and systematic even-odd pairs
+        has_duplicate = False
+        all_pairs_match = True
+
+        # Check each even-odd pair (0-1, 2-3, etc.)
+        for i in range(0, len(costs) - 1, 2):
+            if abs(costs[i]['cost'] - costs[i + 1]['cost']) < 1e-6:
+                has_duplicate = True
+                logger.debug(
+                    f"Instance {row['instance_id']}: Possible buggy double-counting detected! "
+                    f"Steps {i} and {i+1} have identical costs: {costs[i]['cost']:.2f}"
+                )
+            else:
+                all_pairs_match = False
+                break
+
+        # Calculate total cost, accounting for buggy double counting if detected
+        if len(costs) >= 2 and has_duplicate and all_pairs_match:
+            paired_steps_cost = sum(
+                cost_entry['cost']
+                for cost_entry in costs[: -1 if len(costs) % 2 else None]
+            )
+            real_paired_cost = paired_steps_cost / 2
+
+            unpaired_cost = costs[-1]['cost'] if len(costs) % 2 else 0
+            total_cost = real_paired_cost + unpaired_cost
+
+        else:
+            total_cost = sum(cost_entry['cost'] for cost_entry in costs)
+
+        if not abs(total_cost - accumulated) < 1e-6:
+            logger.warning(
+                f"Instance {row['instance_id']}: Cost mismatch: "
+                f"accumulated: {accumulated:.2f}, sum of costs: {total_cost:.2f}, "
+            )
+
+        return total_cost
+
+    except Exception as e:
+        logger.error(
+            f"Error verifying costs for instance {row.get('instance_id', 'UNKNOWN')}: {e}"
+        )
+        return 0.0
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Verify costs in SWE-bench output file'
+    )
+    parser.add_argument(
+        'input_filepath', type=str, help='Path to the output.jsonl file'
+    )
+    args = parser.parse_args()
+
+    try:
+        # Load and verify the JSONL file
+        df = pd.read_json(args.input_filepath, lines=True)
+        logger.info(f'Loaded {len(df)} instances from {args.input_filepath}')
+
+        # Verify costs for each instance and sum up total
+        total_cost = df.apply(verify_instance_costs, axis=1).sum()
+        logger.info(f'Total verified cost across all instances: ${total_cost:.2f}')
+
+    except Exception as e:
+        logger.error(f'Failed to process file: {e}')
+        raise
+
+
+if __name__ == '__main__':
+    main()
--- a/evaluation/benchmarks/swe_bench/scripts/setup/instance_swe_entry.sh
+++ b/evaluation/benchmarks/swe_bench/scripts/setup/instance_swe_entry.sh
@@ -33,7 +33,7 @@ if [ -d /workspace/$WORKSPACE_NAME ]; then
    rm -rf /workspace/$WORKSPACE_NAME
 fi
 mkdir -p /workspace
-ln -s /testbed /workspace/$WORKSPACE_NAME
+cp -r /testbed /workspace/$WORKSPACE_NAME

 # Activate instance-specific environment
 . /opt/miniconda3/etc/profile.d/conda.sh
--- a/evaluation/benchmarks/the_agent_company/README.md
+++ b/evaluation/benchmarks/the_agent_company/README.md
@@ -0,0 +1,43 @@
+# The Agent Company Evaluation with OpenHands
+
+This folder contains the evaluation harness that we built on top of the original [The Agent Company](https://github.com/TheAgentCompany/TheAgentCompany/tree/main/evaluation) ([paper](https://arxiv.org/abs/2412.14161)).
+
+The evaluation consists of three steps:
+
+1. Environment setup: [install python environment](../../README.md#development-environment), [configure LLM config](../../README.md#configure-openhands-and-your-llm), [launch services](https://github.com/TheAgentCompany/TheAgentCompany/blob/main/docs/SETUP.md).
+2. [Run Evaluation](#run-inference-on-the-agent-company-instances): Run all tasks and get the evaluation results.
+
+## Setup Environment and LLM Configuration
+
+Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
+
+## Run Inference on The Agent Company Tasks
+
+When the `run_infer.sh` script is started, it will automatically pull all task images. Every task image will be used to create an OpenHands runtime image where the agent will operate on.
+
+```bash
+./evaluation/benchmarks/the_agent_company/scripts/run_infer.sh \
+  --agent-llm-config <agent-llm-config>  \
+  --env-llm-config <env-llm-config> \
+  --outputs-path <outputs-path> \
+  --server-hostname <server-hostname> \
+  --version <version>
+
+# Example
+./evaluation/benchmarks/the_agent_company/scripts/run_infer.sh \
+  --agent-llm-config claude-3-5-sonnet-20240620 \
+  --env-llm-config claude-3-5-sonnet-20240620 \
+  --outputs-path outputs \
+  --server-hostname localhost \
+  --version 1.0.0
+```
+
+- `agent-llm-config`: the config name for the agent LLM. This should match the config name in config.toml. This is the LLM used by the agent (e.g. CodeActAgent).
+- `env-llm-config`: the config name for the environment LLM. This should match the config name in config.toml. This is used by the chat bots (NPCs) and LLM-based evaluators.
+- `outputs-path`: the path to save trajectories and evaluation results.
+- `server-hostname`: the hostname of the server that hosts all the web services. It could be localhost if you are running the evaluation and services on the same machine. If the services are hosted on a remote machine, you must use the hostname of the remote machine rather than IP address.
+- `version`: the version of the task images to use. Currently, the only supported version is 1.0.0.
+
+The script is idempotent. If you run it again, it will resume from the last checkpoint. It would usually take a few days to finish evaluation.
+
+Note: the script will automatically skip a task if it encounters an error. This usually happens when the OpenHands runtime dies due to some unexpected errors. This means even if the script finishes, it might not have evaluated all tasks. You can manually resume the evaluation by running the script again.
--- a/evaluation/benchmarks/the_agent_company/browsing.py
+++ b/evaluation/benchmarks/the_agent_company/browsing.py
@@ -0,0 +1,273 @@
+##################################################################################################
+# Adapted from https://github.com/TheAgentCompany/TheAgentCompany/blob/main/evaluation/browsing.py
+##################################################################################################
+
+import base64
+import os
+import re
+from dataclasses import dataclass
+from enum import Enum, auto
+from typing import Dict, List, Optional, Union
+
+from openhands.core.logger import openhands_logger as logger
+from openhands.events.action import BrowseInteractiveAction
+from openhands.events.observation import BrowserOutputObservation
+from openhands.runtime.base import Runtime
+
+
+class ActionType(Enum):
+    GOTO = auto()
+    FILL = auto()
+    CLICK = auto()
+    NOOP = auto()
+
+
+@dataclass
+class Selector:
+    """
+    Represents either a direct anchor ID or a descriptive selector
+    """
+
+    value: str
+    is_anchor: bool = False
+
+    def __str__(self) -> str:
+        return f'{self.value}'
+
+
+@dataclass
+class BrowserAction:
+    """Base class for all browser actions"""
+
+    action_type: ActionType
+
+    def to_instruction(self) -> str:
+        """Convert the action to a browser instruction string"""
+        raise NotImplementedError
+
+
+@dataclass
+class GotoAction(BrowserAction):
+    url: str
+
+    def __init__(self, url: str):
+        super().__init__(ActionType.GOTO)
+        self.url = url
+
+    def to_instruction(self) -> str:
+        return f'goto("{self.url}")'
+
+
+@dataclass
+class NoopAction(BrowserAction):
+    milliseconds: int
+
+    def __init__(self, milliseconds: int):
+        super().__init__(ActionType.NOOP)
+        self.milliseconds = milliseconds
+
+    def to_instruction(self) -> str:
+        return f'noop({self.milliseconds})'
+
+
+@dataclass
+class InputAction(BrowserAction):
+    selector: Selector
+    value: str
+
+    def __init__(self, selector: Union[str, Selector], value: str):
+        super().__init__(ActionType.FILL)
+        self.selector = (
+            selector if isinstance(selector, Selector) else Selector(selector)
+        )
+        self.value = value
+
+    def to_instruction(self) -> str:
+        return f'fill("{self.selector}", "{self.value}")'
+
+
+@dataclass
+class ClickAction(BrowserAction):
+    selector: Selector
+
+    def __init__(self, selector: Union[str, Selector]):
+        super().__init__(ActionType.CLICK)
+        self.selector = (
+            selector if isinstance(selector, Selector) else Selector(selector)
+        )
+
+    def to_instruction(self) -> str:
+        return f'click("{self.selector}")'
+
+
+def parse_content_to_elements(content: str) -> Dict[str, str]:
+    """Parse the observation content into a dictionary mapping anchors to their descriptions"""
+    elements = {}
+    current_anchor = None
+    description_lines = []
+
+    for line in content.split('\n'):
+        line = line.strip()
+        if not line:
+            continue
+
+        # Check for anchor line
+        anchor_match = re.match(r'\[(\d+)\](.*)', line)
+        if anchor_match:
+            # Save previous element if it exists
+            if current_anchor and description_lines:
+                elements[current_anchor] = ' '.join(description_lines)
+
+            # Start new element
+            current_anchor = anchor_match.group(1)
+            description_lines = [anchor_match.group(2).strip()]
+        else:
+            # Add to current description if we have an anchor
+            if current_anchor:
+                description_lines.append(line)
+
+    # Save last element
+    if current_anchor and description_lines:
+        elements[current_anchor] = ' '.join(description_lines)
+
+    return elements
+
+
+def find_matching_anchor(content: str, selector: str) -> Optional[str]:
+    """Find the anchor ID that matches the given selector description"""
+    elements = parse_content_to_elements(content)
+
+    # Clean up selector and create a pattern
+    selector = selector.lower().strip()
+
+    for anchor, description in elements.items():
+        description = description.lower().strip()
+        if selector in description:
+            return anchor
+
+    return None
+
+
+def resolve_action(action: BrowserAction, content: str) -> BrowserAction:
+    """
+    Resolve any descriptive selectors in the action to anchor IDs based on the content.
+    Returns a new action with resolved selectors.
+    """
+    if isinstance(action, (InputAction, ClickAction)):
+        if not action.selector.is_anchor:
+            anchor = find_matching_anchor(content, action.selector.value)
+            if anchor:
+                new_selector = Selector(anchor, is_anchor=True)
+                if isinstance(action, InputAction):
+                    return InputAction(new_selector, action.value)
+                else:
+                    return ClickAction(new_selector)
+            else:
+                logger.error(f'NO MATCH FOUND FOR SELECTOR, {action.selector}')
+                return None
+    return action
+
+
+def pre_login(
+    runtime: Runtime,
+    services: List[str],
+    save_screenshots=True,
+    screenshots_dir='screenshots',
+):
+    """
+    Logs in to all the websites that are needed for the evaluation.
+    Once logged in, the sessions would be cached in the browser, so OpenHands
+    agent doesn't need to log in to these websites again.
+    """
+    owncloud_login_actions = [
+        GotoAction('http://the-agent-company.com:8092'),
+        NoopAction(1000),
+        InputAction("textbox '', clickable, focused, required", 'theagentcompany'),
+        NoopAction(1000),
+        InputAction("textbox '', clickable, required", 'theagentcompany'),
+        NoopAction(1000),
+        ClickAction("button '', clickable"),
+        NoopAction(1000),
+    ]
+
+    rocketchat_login_actions = [
+        GotoAction('http://the-agent-company.com:3000'),
+        NoopAction(1000),
+        InputAction("textbox '', clickable, focused", 'theagentcompany'),
+        NoopAction(1000),
+        InputAction("textbox '', clickable", 'theagentcompany'),
+        NoopAction(1000),
+        ClickAction("button 'Login', clickable"),
+    ]
+
+    gitlab_login_actions = [
+        GotoAction('http://the-agent-company.com:8929/users/sign_in'),
+        NoopAction(1000),
+        InputAction("textbox 'Username or primary email'", 'root'),
+        NoopAction(1000),
+        InputAction("textbox 'Password'", 'theagentcompany'),
+        NoopAction(1000),
+        ClickAction("button 'Sign in', clickable"),
+    ]
+
+    # devnote: plane reset is not stable, and sometimes it fails to launch
+    # in which case the login action will fail, and then we would skip the task
+    plane_login_actions = [
+        GotoAction('http://the-agent-company.com:8091'),
+        NoopAction(1000),
+        InputAction(
+            "textbox 'Email', clickable, focused",
+            'agent@company.com',
+        ),
+        NoopAction(1000),
+        ClickAction("button 'Continue'"),
+        NoopAction(1000),
+        InputAction("textbox 'Enter password', clickable", 'theagentcompany'),
+        NoopAction(1000),
+        ClickAction("button 'Go to workspace'"),
+    ]
+
+    all_login_actions = [
+        ('owncloud', owncloud_login_actions),
+        ('rocketchat', rocketchat_login_actions),
+        ('gitlab', gitlab_login_actions),
+        ('plane', plane_login_actions),
+    ]
+
+    for website_name, login_actions in all_login_actions:
+        if website_name not in services:
+            logger.info(
+                f"Skipping login for {website_name} because it's not in the list of services to reset"
+            )
+            continue
+
+        if save_screenshots:
+            directory = os.path.join(screenshots_dir, website_name)
+            if not os.path.exists(directory):
+                os.makedirs(directory)
+            image_id = 0
+        obs: BrowserOutputObservation = None
+        for action in login_actions:
+            # Resolve any descriptive selectors to anchor IDs
+            if obs:
+                action = resolve_action(action, obs.get_agent_obs_text())
+
+            if not action:
+                logger.error(f'FAILED TO RESOLVE ACTION, {action}')
+                raise Exception(
+                    'FAILED TO RESOLVE ACTION, maybe the service is not available'
+                )
+
+            # Convert the action to an instruction string
+            instruction = action.to_instruction()
+
+            browser_action = BrowseInteractiveAction(browser_actions=instruction)
+            browser_action.timeout = 10000
+            logger.info(browser_action, extra={'msg_type': 'ACTION'})
+            obs: BrowserOutputObservation = runtime.run_action(browser_action)
+            logger.debug(obs, extra={'msg_type': 'OBSERVATION'})
+            if save_screenshots:
+                image_data = base64.b64decode(obs.screenshot)
+                with open(os.path.join(directory, f'{image_id}.png'), 'wb') as file:
+                    file.write(image_data)
+                    image_id += 1
--- a/evaluation/benchmarks/the_agent_company/run_infer.py
+++ b/evaluation/benchmarks/the_agent_company/run_infer.py
@@ -0,0 +1,319 @@
+##################################################################################################
+# Adapted from https://github.com/TheAgentCompany/TheAgentCompany/blob/main/evaluation/run_eval.py
+##################################################################################################
+
+import asyncio
+import base64
+import json
+import os
+import shutil
+import tempfile
+from typing import List
+
+import yaml
+from browsing import pre_login
+
+from openhands.controller.state.state import State
+from openhands.core.config import (
+    AppConfig,
+    LLMConfig,
+    SandboxConfig,
+    get_llm_config_arg,
+    get_parser,
+)
+from openhands.core.logger import openhands_logger as logger
+from openhands.core.main import create_runtime, run_controller
+from openhands.events.action import CmdRunAction, MessageAction
+from openhands.events.observation import BrowserOutputObservation, CmdOutputObservation
+from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
+
+
+def get_config(
+    base_container_image: str,
+    task_short_name: str,
+    mount_path_on_host: str,
+    llm_config: LLMConfig,
+) -> AppConfig:
+    config = AppConfig(
+        run_as_openhands=False,
+        max_budget_per_task=4,
+        max_iterations=100,
+        trajectories_path=os.path.join(
+            mount_path_on_host, f'traj_{task_short_name}.json'
+        ),
+        sandbox=SandboxConfig(
+            base_container_image=base_container_image,
+            enable_auto_lint=True,
+            # using host network to access the host machine from the container
+            use_host_network=True,
+            # large enough timeout, since some testcases take very long to run
+            timeout=300,
+            api_key=os.environ.get('ALLHANDS_API_KEY', None),
+        ),
+        # we mount trajectories path so that trajectories, generated by OpenHands
+        # controller, can be accessible to the evaluator file in the runtime container
+        workspace_mount_path=mount_path_on_host,
+        workspace_mount_path_in_sandbox='/outputs',
+    )
+    config.set_llm_config(llm_config)
+    return config
+
+
+def load_dependencies(runtime: Runtime) -> List[str]:
+    """
+    Every task has a dependencies.yml file, which lists all the services that the
+    task depends on. This function loads the file and returns all dependent service names.
+    """
+    command = 'cat /utils/dependencies.yml'
+    action = CmdRunAction(command=command)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs: CmdOutputObservation = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert obs.exit_code == 0
+    dependencies = yaml.safe_load(obs.content)
+    if dependencies is None:
+        dependencies = []
+    return dependencies
+
+
+def init_task_env(runtime: Runtime, hostname: str, env_llm_config: LLMConfig):
+    command = (
+        f'SERVER_HOSTNAME={hostname} '
+        f'LITELLM_API_KEY={env_llm_config.api_key} '
+        f'LITELLM_BASE_URL={env_llm_config.base_url} '
+        f'LITELLM_MODEL={env_llm_config.model} '
+        'bash /utils/init.sh'
+    )
+    action = CmdRunAction(command=command)
+    action.timeout = 900
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert obs.exit_code == 0
+
+
+def codeact_user_response(state: State) -> str:
+    msg = (
+        'Please continue working on the task on whatever approach you think is suitable.\n'
+        'If you think you have solved the task, please finish the interaction.\n'
+        'IMPORTANT: YOU SHOULD NEVER ASK FOR HUMAN HELP.\n'
+    )
+
+    if state.history:
+        # check if the agent has tried to talk to the user 3 times, if so, let the agent know it can give up
+        user_msgs = [
+            event
+            for event in state.history
+            if isinstance(event, MessageAction) and event.source == 'user'
+        ]
+        if len(user_msgs) >= 2:
+            # let the agent know that it can give up when it has tried 3 times
+            return (
+                msg
+                + 'If you want to give up, run: <execute_bash> exit </execute_bash>.\n'
+            )
+    return msg
+
+
+def run_solver(
+    runtime: Runtime,
+    task_name: str,
+    config: AppConfig,
+    dependencies: List[str],
+    save_final_state: bool,
+    state_dir: str,
+    save_screenshots: bool,
+    screenshots_dir: str,
+) -> State:
+    instruction = 'Complete the task in /instruction/task.md'
+
+    if 'gitlab' in dependencies:
+        instruction += "\n\nGitlab username is 'root' and password is 'theagentcompany'"
+
+    state: State | None = asyncio.run(
+        run_controller(
+            config=config,
+            sid=task_name,
+            initial_user_action=MessageAction(content=instruction),
+            runtime=runtime,
+            fake_user_response_fn=codeact_user_response,
+        )
+    )
+    logger.info(state)
+
+    if save_screenshots:
+        screenshots_dir = os.path.join(screenshots_dir, task_name)
+        os.makedirs(screenshots_dir, exist_ok=True)
+        for image_id, obs in enumerate(state.history):
+            if isinstance(obs, BrowserOutputObservation):
+                image_data = base64.b64decode(obs.screenshot)
+                with open(
+                    os.path.join(screenshots_dir, f'{image_id}.png'), 'wb'
+                ) as file:
+                    file.write(image_data)
+
+    if save_final_state:
+        os.makedirs(state_dir, exist_ok=True)
+        with open(os.path.join(state_dir, f'state_{task_name}.json'), 'w') as file:
+            json.dump(str(state), file)
+
+    return state
+
+
+def run_evaluator(
+    runtime: Runtime, env_llm_config: LLMConfig, trajectory_path: str, result_path: str
+):
+    command = (
+        f'LITELLM_API_KEY={env_llm_config.api_key} '
+        f'LITELLM_BASE_URL={env_llm_config.base_url} '
+        f'LITELLM_MODEL={env_llm_config.model} '
+        f"DECRYPTION_KEY='theagentcompany is all you need' "  # Hardcoded Key
+        f'python_default /utils/eval.py --trajectory_path {trajectory_path} --result_path {result_path}'
+    )
+    action = CmdRunAction(command=command)
+    action.timeout = 600
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert obs.exit_code == 0
+
+
+if __name__ == '__main__':
+    parser = get_parser()
+    parser.add_argument(
+        '--task-image-name',
+        type=str,
+        default='ghcr.io/theagentcompany/example-image:1.0.0',
+        help='Task image name',
+    )
+    parser.add_argument(
+        '--outputs-path',
+        type=str,
+        default='./outputs',
+        help='Folder path to save trajectories and evaluation results',
+    )
+    parser.add_argument(
+        '--server-hostname',
+        type=str,
+        default='localhost',
+        help='Server hostname, e.g. localhost to access the host machine from the container, '
+        'assuming the task docker container is run with `--network host` flag',
+    )
+    parser.add_argument(
+        '--agent-llm-config',
+        type=str,
+        default=None,
+        help='LLM config for agent',
+    )
+    parser.add_argument(
+        '--env-llm-config',
+        type=str,
+        default=None,
+        help='LLM config for evaluation environment (NPC & llm-based evaluator)',
+    )
+    args, _ = parser.parse_known_args()
+
+    agent_llm_config: LLMConfig | None = None
+    if args.agent_llm_config:
+        agent_llm_config = get_llm_config_arg(args.agent_llm_config)
+
+    if agent_llm_config is None:
+        raise ValueError(
+            f'Could not find LLM config for agent: --agent-llm-config {args.agent_llm_config}'
+        )
+
+    if agent_llm_config.api_key is None:
+        raise ValueError('LLM API key is not set for agent')
+
+    env_llm_config: LLMConfig | None = None
+    if args.env_llm_config:
+        env_llm_config = get_llm_config_arg(args.env_llm_config)
+
+    if env_llm_config is None:
+        raise ValueError(
+            f'Could not find LLM config for evaluation environment: --env-llm-config {args.env_llm_config}'
+        )
+
+    if env_llm_config.api_key is None:
+        raise ValueError('LLM API key is not set for evaluation environment')
+
+    task_short_name = args.task_image_name.split('/')[-1].split(':')[0]
+    logger.info(
+        f'Task image name is {args.task_image_name}, short name is {task_short_name}'
+    )
+
+    # mount a temporary directory to pass trajectory from host to container, and to
+    # pass the evaluation result from container to host
+    # 1) trajectory is dumped by OpenHands library (on host machine), but it's needed by
+    # evaluator (in container), so we mount a temporary directory to pass it in
+    # 2) evaluation result is written by evaluator (in container), but we need to persist
+    # it on host machine, so we mount a temporary directory to pass it out
+    if os.getenv('TMPDIR') and os.path.exists(os.getenv('TMPDIR')):
+        temp_dir = os.path.abspath(os.getenv('TMPDIR'))
+    else:
+        temp_dir = tempfile.mkdtemp()
+    config: AppConfig = get_config(
+        args.task_image_name, task_short_name, temp_dir, agent_llm_config
+    )
+    runtime: Runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
+
+    init_task_env(runtime, args.server_hostname, env_llm_config)
+
+    dependencies = load_dependencies(runtime)
+    logger.info(f'Service dependencies: {dependencies}')
+
+    try:
+        pre_login(
+            runtime,
+            dependencies,
+            save_screenshots=True,
+            screenshots_dir=os.path.join(
+                os.path.abspath(args.outputs_path), 'screenshots'
+            ),
+        )
+    except Exception as e:
+        logger.error(f'Failed to pre-login: {e}')
+
+        # before giving up, let's try to init and login again
+        init_task_env(runtime, args.server_hostname, env_llm_config)
+        pre_login(
+            runtime,
+            dependencies,
+            save_screenshots=True,
+            screenshots_dir=os.path.join(
+                os.path.abspath(args.outputs_path), 'screenshots'
+            ),
+        )
+
+    state = run_solver(
+        runtime,
+        task_short_name,
+        config,
+        dependencies,
+        save_final_state=True,
+        state_dir=os.path.abspath(args.outputs_path),
+        save_screenshots=True,
+        screenshots_dir=os.path.join(os.path.abspath(args.outputs_path), 'screenshots'),
+    )
+
+    # this path is the absolute path in the runtime container
+    trajectory_path = f'/outputs/traj_{task_short_name}.json'
+    result_path = f'/outputs/eval_{task_short_name}.json'
+
+    run_evaluator(runtime, env_llm_config, trajectory_path, result_path)
+
+    # finally, move trajectory file and evaluation result from mount path on host (temp dir) to outputs path
+    shutil.move(
+        os.path.join(temp_dir, f'traj_{task_short_name}.json'),
+        os.path.join(
+            os.path.abspath(args.outputs_path), f'traj_{task_short_name}.json'
+        ),
+    )
+    shutil.move(
+        os.path.join(temp_dir, f'eval_{task_short_name}.json'),
+        os.path.join(
+            os.path.abspath(args.outputs_path), f'eval_{task_short_name}.json'
+        ),
+    )
--- a/evaluation/benchmarks/the_agent_company/scripts/run_infer.sh
+++ b/evaluation/benchmarks/the_agent_company/scripts/run_infer.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+
+##################################################################################################
+# Adapted from https://github.com/TheAgentCompany/TheAgentCompany/blob/main/evaluation/run_eval.sh
+##################################################################################################
+
+# Exit on any error would be useful for debugging
+if [ -n "$DEBUG" ]; then
+    set -e
+fi
+
+# AGENT_LLM_CONFIG is the config name for the agent LLM
+# In config.toml, you should have a section with the name
+# [llm.<AGENT_LLM_CONFIG>], e.g. [llm.agent]
+AGENT_LLM_CONFIG="agent"
+
+# ENV_LLM_CONFIG is the config name for the environment LLM,
+# used by the NPCs and LLM-based evaluators.
+# In config.toml, you should have a section with the name
+# [llm.<ENV_LLM_CONFIG>], e.g. [llm.env]
+ENV_LLM_CONFIG="env"
+
+# OUTPUTS_PATH is the path to save trajectories and evaluation results
+OUTPUTS_PATH="outputs"
+
+# SERVER_HOSTNAME is the hostname of the server that hosts all the web services,
+# including RocketChat, ownCloud, GitLab, and Plane.
+SERVER_HOSTNAME="localhost"
+
+# VERSION is the version of the task images to use
+# If a task doesn't have a published image with this version, it will be skipped
+# 12/15/2024: this is for forward compatibility, in the case where we add new tasks
+# after the 1.0.0 release
+VERSION="1.0.0"
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --agent-llm-config)
+            AGENT_LLM_CONFIG="$2"
+            shift 2
+            ;;
+        --env-llm-config)
+            ENV_LLM_CONFIG="$2"
+            shift 2
+            ;;
+        --outputs-path)
+            OUTPUTS_PATH="$2"
+            shift 2
+            ;;
+        --server-hostname)
+            SERVER_HOSTNAME="$2"
+            shift 2
+            ;;
+        --version)
+            VERSION="$2"
+            shift 2
+            ;;
+        *)
+            echo "Unknown argument: $1"
+            exit 1
+            ;;
+    esac
+done
+
+# Convert outputs_path to absolute path
+if [[ ! "$OUTPUTS_PATH" = /* ]]; then
+    # If path is not already absolute (doesn't start with /), make it absolute
+    OUTPUTS_PATH="$(cd "$(dirname "$OUTPUTS_PATH")" 2>/dev/null && pwd)/$(basename "$OUTPUTS_PATH")"
+fi
+
+echo "Using agent LLM config: $AGENT_LLM_CONFIG"
+echo "Using environment LLM config: $ENV_LLM_CONFIG"
+echo "Outputs path: $OUTPUTS_PATH"
+echo "Server hostname: $SERVER_HOSTNAME"
+echo "Version: $VERSION"
+
+echo "Downloading tasks.md..."
+rm -f tasks.md
+wget https://github.com/TheAgentCompany/TheAgentCompany/releases/download/${VERSION}/tasks.md
+
+while IFS= read -r task_image; do
+    docker pull $task_image
+
+    # Remove prefix using ## to remove longest matching pattern from start
+    task_name=${task_image##ghcr.io/theagentcompany/}
+
+    # Remove suffix using % to remove shortest matching pattern from end
+    task_name=${task_name%-image:*}
+    echo "Use task image $task_image, task name $task_name..."
+
+    # Check if evaluation file exists
+    if [ -f "$OUTPUTS_PATH/eval_${task_name}-image.json" ]; then
+        echo "Skipping $task_name - evaluation file already exists"
+        continue
+    fi
+
+    export PYTHONPATH=evaluation/benchmarks/the_agent_company:\$PYTHONPATH && \
+        poetry run python run_infer.py \
+            --agent-llm-config "$AGENT_LLM_CONFIG" \
+            --env-llm-config "$ENV_LLM_CONFIG" \
+            --outputs-path "$OUTPUTS_PATH" \
+            --server-hostname "$SERVER_HOSTNAME" \
+            --task-image-name "$task_image"
+
+    # Prune unused images and volumes
+    docker image rm "$task_image"
+    docker images "ghcr.io/all-hands-ai/runtime" -q | xargs -r docker rmi -f
+    docker volume prune -f
+    docker system prune -f
+done < tasks.md
+
+rm tasks.md
+
+echo "All evaluation completed successfully!"
--- a/evaluation/benchmarks/toolqa/README.md
+++ b/evaluation/benchmarks/toolqa/README.md
@@ -11,7 +11,7 @@ Please follow instruction [here](../../README.md#setup) to setup your local deve
 Make sure your Docker daemon is running, then run this bash script:

 ```bash
-bash evaluation/benchmarks/toolqa/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [dataset] [hardness] [wolfram_alpha_appid]
+bash evaluation/benchmarks/toolqa/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [dataset] [hardness] [wolfram-alpha-appid]
 ```

 where `model_config` is mandatory, while all other arguments are optional.
@@ -32,7 +32,7 @@ By default, the script evaluates 1 instance.

 `hardness`, the hardness to evaluate. You could choose from `easy` and `hard`. The default is `easy`.

-`wolfram_alpha_appid` is an optional argument. When given `wolfram_alpha_appid`, the agent will be able to access Wolfram Alpha's APIs.
+`wolfram-alpha-appid` is an optional argument. When given `wolfram-alpha-appid`, the agent will be able to access Wolfram Alpha's APIs.

 Note: in order to use `eval_limit`, you must also set `agent`; in order to use `dataset`, you must also set `eval_limit`; in order to use `hardness`, you must also set `dataset`.

--- a/evaluation/benchmarks/toolqa/run_infer.py
+++ b/evaluation/benchmarks/toolqa/run_infer.py
@@ -171,7 +171,7 @@ if __name__ == '__main__':
        default='easy',
    )
    parser.add_argument(
-        '--wolfram_alpha_appid',
+        '--wolfram-alpha-appid',
        type=str,
        help='wolfram alpha appid to use for wolfram alpha related tests',
        default='YOUR_WOLFRAMALPHA_APPID',
@@ -181,6 +181,9 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
+
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

--- a/evaluation/benchmarks/toolqa/scripts/run_infer.sh
+++ b/evaluation/benchmarks/toolqa/scripts/run_infer.sh
@@ -53,7 +53,7 @@ COMMAND="poetry run python evaluation/benchmarks/toolqa/run_infer.py \
  --max-iterations 30 \
  --dataset $DATASET \
  --hardness $HARDNESS \
-  --wolfram_alpha_appid $WOLFRAM_APPID\
+  --wolfram-alpha-appid $WOLFRAM_APPID\
  --data-split validation \
  --eval-num-workers $NUM_WORKERS \
  --eval-note ${OPENHANDS_VERSION}_${LEVELS}"
--- a/evaluation/benchmarks/webarena/run_infer.py
+++ b/evaluation/benchmarks/webarena/run_infer.py
@@ -212,6 +212,8 @@ if __name__ == '__main__':
    llm_config = None
    if args.llm_config:
        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

--- a/evaluation/utils/shared.py
+++ b/evaluation/utils/shared.py
@@ -8,6 +8,7 @@ import subprocess
 import time
 import traceback
 from contextlib import contextmanager
+from inspect import signature
 from typing import Any, Awaitable, Callable, TextIO

 import pandas as pd
@@ -16,6 +17,15 @@ from tqdm import tqdm

 from openhands.controller.state.state import State
 from openhands.core.config import LLMConfig
+from openhands.core.exceptions import (
+    AgentRuntimeBuildError,
+    AgentRuntimeDisconnectedError,
+    AgentRuntimeError,
+    AgentRuntimeNotFoundError,
+    AgentRuntimeNotReadyError,
+    AgentRuntimeTimeoutError,
+    AgentRuntimeUnavailableError,
+)
 from openhands.core.logger import get_console_handler
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action import Action
@@ -306,13 +316,20 @@ def _process_instance_wrapper(
    timeout_seconds: int | None = None,
 ) -> EvalOutput:
    """Wrap the process_instance_func to handle retries and errors."""
+    runtime_failure_count = 0
    for attempt in range(max_retries + 1):
        try:
+            kwargs = {}
+            # check if process_instance_func accepts timeout_seconds parameter
+            sig = signature(process_instance_func)
+            if 'runtime_failure_count' in sig.parameters:
+                kwargs['runtime_failure_count'] = runtime_failure_count
+
            if timeout_seconds is not None:
                with timeout(timeout_seconds):
-                    result = process_instance_func(instance, metadata, use_mp)
+                    result = process_instance_func(instance, metadata, use_mp, **kwargs)
            else:
-                result = process_instance_func(instance, metadata, use_mp)
+                result = process_instance_func(instance, metadata, use_mp, **kwargs)
            return result
        except EvalTimeoutException as e:
            error = f'Timeout after {timeout_seconds} seconds'
@@ -358,6 +375,11 @@ def _process_instance_wrapper(
                + '-' * 10
                + '\n'
            )
+            if isinstance(
+                e, (AgentRuntimeDisconnectedError, AgentRuntimeUnavailableError)
+            ):
+                runtime_failure_count += 1
+                msg += f'Runtime disconnected error detected for instance {instance.instance_id}, runtime failure count: {runtime_failure_count}'
            logger.error(msg)
            if use_mp:
                print(msg)  # use print to directly print to console
@@ -503,3 +525,24 @@ def compatibility_for_eval_history_pairs(
        history_pairs.append((event_to_dict(action), event_to_dict(observation)))

    return history_pairs
+
+
+def is_fatal_evaluation_error(error: str | None) -> bool:
+    if not error:
+        return False
+
+    FATAL_EXCEPTIONS = [
+        AgentRuntimeError,
+        AgentRuntimeBuildError,
+        AgentRuntimeTimeoutError,
+        AgentRuntimeUnavailableError,
+        AgentRuntimeNotReadyError,
+        AgentRuntimeDisconnectedError,
+        AgentRuntimeNotFoundError,
+    ]
+
+    if any(exception.__name__ in error for exception in FATAL_EXCEPTIONS):
+        logger.error(f'Fatal evaluation error detected: {error}')
+        return True
+
+    return False
--- a/frontend/tests/components/browser.test.tsx
+++ b/frontend/tests/components/browser.test.tsx
@@ -1,16 +1,45 @@
+import { describe, it, expect, afterEach, vi } from "vitest";
+import * as router from "react-router";
+
+// Mock useParams before importing components
+vi.mock("react-router", async () => {
+  const actual = await vi.importActual("react-router");
+  return {
+    ...actual as object,
+    useParams: () => ({ conversationId: "test-conversation-id" }),
+  };
+});
+
+// Mock i18next
+vi.mock("react-i18next", async () => {
+  const actual = await vi.importActual("react-i18next");
+  return {
+    ...actual as object,
+    useTranslation: () => ({
+      t: (key: string) => key,
+      i18n: {
+        changeLanguage: () => new Promise(() => {}),
+      },
+    }),
+  };
+});
+
 import { screen } from "@testing-library/react";
-import { describe, it, expect } from "vitest";
 import { renderWithProviders } from "../../test-utils";
 import { BrowserPanel } from "#/components/features/browser/browser";


 describe("Browser", () => {
+  afterEach(() => {
+    vi.clearAllMocks();
+  });
  it("renders a message if no screenshotSrc is provided", () => {
    renderWithProviders(<BrowserPanel />, {
      preloadedState: {
        browser: {
          url: "https://example.com",
          screenshotSrc: "",
+          updateCount: 0,
        },
      },
    });
@@ -26,6 +55,7 @@ describe("Browser", () => {
          url: "https://example.com",
          screenshotSrc:
            "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mN0uGvyHwAFCAJS091fQwAAAABJRU5ErkJggg==",
+          updateCount: 0,
        },
      },
    });
--- a/frontend/tests/components/chat-message.test.tsx
+++ b/frontend/tests/components/chat-message.test.tsx
@@ -26,8 +26,6 @@ describe("ChatMessage", () => {
    expect(screen.getByText("'Hello, World!'")).toBeInTheDocument();
  });

-  it.todo("should support markdown content");
-
  it("should render the copy to clipboard button when the user hovers over the message", async () => {
    const user = userEvent.setup();
    render(<ChatMessage type="user" message="Hello, World!" />);
@@ -50,15 +48,8 @@ describe("ChatMessage", () => {
    expect(navigator.clipboard.readText()).resolves.toBe("Hello, World!");
  });

-  // BUG: vi.useFakeTimers() seems to break the tests
-  it.todo(
-    "should display a checkmark for 200ms and disable the button after copying content to clipboard",
-  );
-
  it("should display an error toast if copying content to clipboard fails", async () => {});

-  test.todo("push a toast after successfully copying content to clipboard");
-
  it("should render a component passed as a prop", () => {
    function Component() {
      return <div data-testid="custom-component">Custom Component</div>;
@@ -70,4 +61,17 @@ describe("ChatMessage", () => {
    );
    expect(screen.getByTestId("custom-component")).toBeInTheDocument();
  });
+
+  it("should apply correct styles to inline code", () => {
+    render(
+      <ChatMessage
+        type="assistant"
+        message="Here is some `inline code` text"
+      />,
+    );
+    const codeElement = screen.getByText("inline code");
+
+    expect(codeElement.tagName.toLowerCase()).toBe("code");
+    expect(codeElement.closest("article")).not.toBeNull();
+  });
 });
--- a/frontend/tests/components/chat/chat-input.test.tsx
+++ b/frontend/tests/components/chat/chat-input.test.tsx
@@ -51,6 +51,22 @@ describe("ChatInput", () => {
    expect(onSubmitMock).not.toHaveBeenCalled();
  });

+  it("should not call onSubmit when the message is only whitespace", async () => {
+    const user = userEvent.setup();
+    render(<ChatInput onSubmit={onSubmitMock} />);
+    const textarea = screen.getByRole("textbox");
+
+    await user.type(textarea, "   ");
+    await user.keyboard("{Enter}");
+
+    expect(onSubmitMock).not.toHaveBeenCalled();
+
+    await user.type(textarea, " \t\n");
+    await user.keyboard("{Enter}");
+
+    expect(onSubmitMock).not.toHaveBeenCalled();
+  });
+
  it("should disable submit", async () => {
    const user = userEvent.setup();
    render(<ChatInput disabled onSubmit={onSubmitMock} />);
--- a/frontend/tests/components/chat/chat-interface.test.tsx
+++ b/frontend/tests/components/chat/chat-interface.test.tsx
@@ -9,7 +9,7 @@ import { WsClientProviderStatus } from "#/context/ws-client-provider";
 import { ChatInterface } from "#/components/features/chat/chat-interface";

 // eslint-disable-next-line @typescript-eslint/no-unused-vars
-const renderChatInterface = (messages: (Message)[]) =>
+const renderChatInterface = (messages: Message[]) =>
  renderWithProviders(<ChatInterface />);

 describe("Empty state", () => {
@@ -20,7 +20,7 @@ describe("Empty state", () => {
  const { useWsClient: useWsClientMock } = vi.hoisted(() => ({
    useWsClient: vi.fn(() => ({
      send: sendMock,
-      status: WsClientProviderStatus.ACTIVE,
+      status: WsClientProviderStatus.CONNECTED,
      isLoadingMessages: false,
    })),
  }));
@@ -90,7 +90,7 @@ describe("Empty state", () => {
      // this is to test that the message is in the UI before the socket is called
      useWsClientMock.mockImplementation(() => ({
        send: sendMock,
-        status: WsClientProviderStatus.ACTIVE,
+        status: WsClientProviderStatus.CONNECTED,
        isLoadingMessages: false,
      }));
      const addUserMessageSpy = vi.spyOn(ChatSlice, "addUserMessage");
@@ -120,7 +120,7 @@ describe("Empty state", () => {
    async () => {
      useWsClientMock.mockImplementation(() => ({
        send: sendMock,
-        status: WsClientProviderStatus.ACTIVE,
+        status: WsClientProviderStatus.CONNECTED,
        isLoadingMessages: false,
      }));
      const user = userEvent.setup();
@@ -138,7 +138,7 @@ describe("Empty state", () => {

      useWsClientMock.mockImplementation(() => ({
        send: sendMock,
-        status: WsClientProviderStatus.ACTIVE,
+        status: WsClientProviderStatus.CONNECTED,
        isLoadingMessages: false,
      }));
      rerender(<ChatInterface />);
@@ -195,7 +195,7 @@ describe.skip("ChatInterface", () => {
    expect(screen.getByTestId("chat-input")).toBeInTheDocument();
  });

-  it.todo("should call socket send when submitting a message", async () => {
+  it("should call socket send when submitting a message", async () => {
    const user = userEvent.setup();
    const messages: Message[] = [];
    renderChatInterface(messages);
@@ -240,8 +240,6 @@ describe.skip("ChatInterface", () => {
    );
  });

-  it.todo("should render confirmation buttons");
-
  it("should render a 'continue' action when there are more than 2 messages and awaiting user input", () => {
    const messages: Message[] = [
      {
@@ -278,7 +276,7 @@ describe.skip("ChatInterface", () => {
  });

  it("should render inline errors", () => {
-    const messages: (Message)[] = [
+    const messages: Message[] = [
      {
        sender: "assistant",
        content: "Hello",
@@ -402,12 +400,4 @@ describe.skip("ChatInterface", () => {

    expect(screen.getByTestId("feedback-actions")).toBeInTheDocument();
  });
-
-  describe("feedback", () => {
-    it.todo("should open the feedback modal when a feedback action is clicked");
-    it.todo(
-      "should submit feedback and hide the actions when feedback is shared",
-    );
-    it.todo("should render the actions once more after new messages are added");
-  });
 });
--- a/frontend/tests/components/chat/expandable-message.test.tsx
+++ b/frontend/tests/components/chat/expandable-message.test.tsx
@@ -0,0 +1,84 @@
+import { describe, expect, it } from "vitest";
+import { screen } from "@testing-library/react";
+import { renderWithProviders } from "test-utils";
+import { ExpandableMessage } from "#/components/features/chat/expandable-message";
+import { vi } from 'vitest';
+
+vi.mock('react-i18next', async () => {
+  const actual = await vi.importActual('react-i18next');
+  return {
+    ...actual,
+    useTranslation: () => ({
+      t: (key:string) => key,
+      i18n: {
+        changeLanguage: () => new Promise(() => {}),
+        language: 'en',
+        exists: () => true,
+      },
+    }),
+  }
+});
+
+describe("ExpandableMessage", () => {
+  it("should render with neutral border for non-action messages", () => {
+    renderWithProviders(<ExpandableMessage message="Hello" type="thought" />);
+    const element = screen.getByText("Hello");
+    const container = element.closest("div.flex.gap-2.items-center.justify-start");
+    expect(container).toHaveClass("border-neutral-300");
+    expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
+  });
+
+  it("should render with neutral border for error messages", () => {
+    renderWithProviders(<ExpandableMessage message="Error occurred" type="error" />);
+    const element = screen.getByText("Error occurred");
+    const container = element.closest("div.flex.gap-2.items-center.justify-start");
+    expect(container).toHaveClass("border-danger");
+    expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
+  });
+
+  it("should render with success icon for successful action messages", () => {
+    renderWithProviders(
+      <ExpandableMessage
+        id="OBSERVATION_MESSAGE$RUN"
+        message="Command executed successfully"
+        type="action"
+        success={true}
+      />
+    );
+    const element = screen.getByText("OBSERVATION_MESSAGE$RUN");
+    const container = element.closest("div.flex.gap-2.items-center.justify-start");
+    expect(container).toHaveClass("border-neutral-300");
+    const icon = screen.getByTestId("status-icon");
+    expect(icon).toHaveClass("fill-success");
+  });
+
+  it("should render with error icon for failed action messages", () => {
+    renderWithProviders(
+      <ExpandableMessage
+        id="OBSERVATION_MESSAGE$RUN"
+        message="Command failed"
+        type="action"
+        success={false}
+      />
+    );
+    const element = screen.getByText("OBSERVATION_MESSAGE$RUN");
+    const container = element.closest("div.flex.gap-2.items-center.justify-start");
+    expect(container).toHaveClass("border-neutral-300");
+    const icon = screen.getByTestId("status-icon");
+    expect(icon).toHaveClass("fill-danger");
+  });
+
+  it("should render with neutral border and no icon for action messages without success prop", () => {
+    renderWithProviders(
+      <ExpandableMessage
+        id="OBSERVATION_MESSAGE$RUN"
+        message="Running command"
+        type="action"
+      />
+    );
+    const element = screen.getByText("OBSERVATION_MESSAGE$RUN");
+    const container = element.closest("div.flex.gap-2.items-center.justify-start");
+    expect(container).toHaveClass("border-neutral-300");
+    expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
+  });
+});
--- a/frontend/tests/components/features/waitlist-modal.test.tsx
+++ b/frontend/tests/components/features/waitlist-modal.test.tsx
@@ -1,10 +1,18 @@
 import { render, screen } from "@testing-library/react";
-import { it, describe, expect, vi } from "vitest";
+import { it, describe, expect, vi, beforeAll, afterAll } from "vitest";
 import userEvent from "@testing-library/user-event";
 import { WaitlistModal } from "#/components/features/waitlist/waitlist-modal";
 import * as CaptureConsent from "#/utils/handle-capture-consent";

 describe("WaitlistModal", () => {
+  beforeAll(() => {
+    vi.stubGlobal("location", { href: "" });
+  });
+
+  afterAll(() => {
+    vi.unstubAllGlobals();
+  });
+
  it("should render a tos checkbox that is unchecked by default", () => {
    render(<WaitlistModal ghToken={null} githubAuthUrl={null} />);
    const checkbox = screen.getByRole("checkbox");
--- a/frontend/tests/components/feedback-form.test.tsx
+++ b/frontend/tests/components/feedback-form.test.tsx
@@ -1,6 +1,17 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import * as router from "react-router";
+
+// Mock useParams before importing components
+vi.mock("react-router", async () => {
+  const actual = await vi.importActual("react-router");
+  return {
+    ...actual as object,
+    useParams: () => ({ conversationId: "test-conversation-id" }),
+  };
+});
+
 import { screen } from "@testing-library/react";
 import userEvent from "@testing-library/user-event";
-import { afterEach, describe, expect, it, vi } from "vitest";
 import { renderWithProviders } from "test-utils";
 import { FeedbackForm } from "#/components/features/feedback/feedback-form";

--- a/frontend/tests/components/file-explorer/explorer-tree.test.tsx
+++ b/frontend/tests/components/file-explorer/explorer-tree.test.tsx
@@ -25,10 +25,4 @@ describe.skip("ExplorerTree", () => {
    expect(screen.queryByText("folder-1-2")).toBeInTheDocument();
    // TODO: make sure children don't render
  });
-
-  it.todo("should render all children as collapsed when defaultOpen is false");
-
-  it.todo(
-    "should maintain the expanded state of child folders when closing and opening a parent folder",
-  );
 });
--- a/frontend/tests/components/file-explorer/file-explorer.test.tsx
+++ b/frontend/tests/components/file-explorer/file-explorer.test.tsx
@@ -3,7 +3,7 @@ import userEvent from "@testing-library/user-event";
 import { renderWithProviders } from "test-utils";
 import { describe, it, expect, vi, Mock, afterEach } from "vitest";
 import toast from "#/utils/toast";
-import AgentState from "#/types/agent-state";
+import { AgentState } from "#/types/agent-state";
 import OpenHands from "#/api/open-hands";
 import { FileExplorer } from "#/components/features/file-explorer/file-explorer";

@@ -37,8 +37,6 @@ describe.skip("FileExplorer", () => {
    expect(getFilesSpy).toHaveBeenCalledTimes(1); // once for root
  });

-  it.todo("should render an empty workspace");
-
  it("should refetch the workspace when clicking the refresh button", async () => {
    const user = userEvent.setup();
    renderFileExplorerWithRunningAgentState();
@@ -87,14 +85,6 @@ describe.skip("FileExplorer", () => {
    expect(getFilesSpy).toHaveBeenCalled();
  });

-  it.todo("should upload files when dragging them to the explorer", () => {
-    // It will require too much work to mock drag logic, especially for our case
-    // https://github.com/testing-library/user-event/issues/440#issuecomment-685010755
-    // TODO: should be tested in an e2e environment such as Cypress/Playwright
-  });
-
-  it.todo("should download a file");
-
  it("should display an error toast if file upload fails", async () => {
    (uploadFilesSpy as Mock).mockRejectedValue(new Error());
    const user = userEvent.setup();
--- a/frontend/tests/components/interactive-chat-box.test.tsx
+++ b/frontend/tests/components/interactive-chat-box.test.tsx
@@ -1,4 +1,4 @@
-import { render, screen, within } from "@testing-library/react";
+import { render, screen, within, fireEvent } from "@testing-library/react";
 import userEvent from "@testing-library/user-event";
 import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
 import { InteractiveChatBox } from "#/components/features/chat/interactive-chat-box";
@@ -131,4 +131,60 @@ describe("InteractiveChatBox", () => {
    await user.click(stopButton);
    expect(onStopMock).toHaveBeenCalledOnce();
  });
+
+  it("should handle image upload and message submission correctly", async () => {
+    const user = userEvent.setup();
+    const onSubmit = vi.fn();
+    const onStop = vi.fn();
+    const onChange = vi.fn();
+
+    const { rerender } = render(
+      <InteractiveChatBox
+        onSubmit={onSubmit}
+        onStop={onStop}
+        onChange={onChange}
+        value="test message"
+      />
+    );
+
+    // Upload an image via the upload button - this should NOT clear the text input
+    const file = new File(["dummy content"], "test.png", { type: "image/png" });
+    const input = screen.getByTestId("upload-image-input");
+    await user.upload(input, file);
+
+    // Verify text input was not cleared
+    expect(screen.getByRole("textbox")).toHaveValue("test message");
+    expect(onChange).not.toHaveBeenCalledWith("");
+
+    // Submit the message with image
+    const submitButton = screen.getByRole("button", { name: "Send" });
+    await user.click(submitButton);
+
+    // Verify onSubmit was called with the message and image
+    expect(onSubmit).toHaveBeenCalledWith("test message", [file]);
+
+    // Verify onChange was called to clear the text input
+    expect(onChange).toHaveBeenCalledWith("");
+
+    // Simulate parent component updating the value prop
+    rerender(
+      <InteractiveChatBox
+        onSubmit={onSubmit}
+        onStop={onStop}
+        onChange={onChange}
+        value=""
+      />
+    );
+
+    // Verify the text input was cleared
+    expect(screen.getByRole("textbox")).toHaveValue("");
+
+    // Upload another image - this should NOT clear the text input
+    onChange.mockClear();
+    await user.upload(input, file);
+
+    // Verify text input is still empty and onChange was not called
+    expect(screen.getByRole("textbox")).toHaveValue("");
+    expect(onChange).not.toHaveBeenCalled();
+  });
 });
--- a/frontend/tests/components/jupyter/jupyter.test.tsx
+++ b/frontend/tests/components/jupyter/jupyter.test.tsx
@@ -0,0 +1,45 @@
+import { render, screen } from "@testing-library/react";
+import { Provider } from "react-redux";
+import { configureStore } from "@reduxjs/toolkit";
+import { JupyterEditor } from "#/components/features/jupyter/jupyter";
+import { jupyterReducer } from "#/state/jupyter-slice";
+import { vi, describe, it, expect } from "vitest";
+
+describe("JupyterEditor", () => {
+  const mockStore = configureStore({
+    reducer: {
+      fileState: () => ({}),
+      initalQuery: () => ({}),
+      browser: () => ({}),
+      chat: () => ({}),
+      code: () => ({}),
+      cmd: () => ({}),
+      agent: () => ({}),
+      jupyter: jupyterReducer,
+      securityAnalyzer: () => ({}),
+      status: () => ({}),
+    },
+    preloadedState: {
+      jupyter: {
+        cells: Array(20).fill({
+          content: "Test cell content",
+          type: "input",
+          output: "Test output",
+        }),
+      },
+    },
+  });
+
+  it("should have a scrollable container", () => {
+    render(
+      <Provider store={mockStore}>
+        <div style={{ height: "100vh" }}>
+          <JupyterEditor maxWidth={800} />
+        </div>
+      </Provider>
+    );
+
+    const container = screen.getByTestId("jupyter-container");
+    expect(container).toHaveClass("flex-1 overflow-y-auto");
+  });
+});
--- a/frontend/tests/components/modals/settings/model-selector.test.tsx
+++ b/frontend/tests/components/modals/settings/model-selector.test.tsx
@@ -109,11 +109,4 @@ describe("ModelSelector", () => {
    expect(screen.getByLabelText("LLM Provider")).toHaveValue("Azure");
    expect(screen.getByLabelText("LLM Model")).toHaveValue("ada");
  });
-
-  it.todo("should disable provider if isDisabled is true");
-
-  it.todo(
-    "should display the verified models in the correct order",
-    async () => {},
-  );
 });
--- a/frontend/tests/components/project-menu/project-menu-card.test.tsx
+++ b/frontend/tests/components/project-menu/project-menu-card.test.tsx
@@ -1,8 +0,0 @@
-import { describe, it } from "vitest";
-
-describe("PlayMenuCard", () => {
-  it.todo("should render the initial project title");
-  it.todo("should be able to edit the project title");
-  it.todo("should render the menu list items when clicking the ellipses");
-  it.todo("should close the menu list when clicking outside");
-});
--- a/frontend/tests/components/settings/ai-config-form.test.tsx
+++ b/frontend/tests/components/settings/ai-config-form.test.tsx
@@ -1,9 +0,0 @@
-import { describe, it } from "vitest";
-
-describe("AIConfigForm", () => {
-  it.todo("should render the AI config form");
-  it.todo("should toggle the advanced settings when clicked");
-  it.todo("should call the onSubmit callback when the form is submitted");
-  it.todo("should call the onReset callback when the reset button is clicked");
-  it.todo("should call the onClose callback when the close button is clicked");
-});
--- a/frontend/tests/components/settings/dropdown-input.test.tsx
+++ b/frontend/tests/components/settings/dropdown-input.test.tsx
@@ -1,9 +0,0 @@
-import { describe, it } from "vitest";
-
-describe("DropdownInput", () => {
-  it.todo("should render the input");
-  it.todo("should render the placeholder");
-  it.todo("should render the dropdown when clicked");
-  it.todo("should select an option when clicked");
-  it.todo("should filter the options when typing");
-});
--- a/frontend/tests/components/settings/model-selector.test.tsx
+++ b/frontend/tests/components/settings/model-selector.test.tsx
@@ -1,12 +0,0 @@
-import { describe, it } from "vitest";
-
-describe("ModelSelector", () => {
-  it.todo("should render the model selector");
-  it.todo("should display and select the providers");
-  it.todo("should display and select the models");
-  it.todo("should disable the models if a provider is not selected");
-  it.todo("should disable the inputs if isDisabled is true");
-  it.todo(
-    "should set the selected model and provider if the currentModel prop is set",
-  );
-});
--- a/frontend/tests/components/terminal/terminal.test.tsx
+++ b/frontend/tests/components/terminal/terminal.test.tsx
@@ -4,26 +4,6 @@ import { vi, describe, afterEach, it, expect } from "vitest";
 import { Command, appendInput, appendOutput } from "#/state/command-slice";
 import Terminal from "#/components/features/terminal/terminal";

-global.ResizeObserver = vi.fn().mockImplementation(() => ({
-  observe: vi.fn(),
-  disconnect: vi.fn(),
-}));
-
-const mockTerminal = {
-  open: vi.fn(),
-  write: vi.fn(),
-  writeln: vi.fn(),
-  dispose: vi.fn(),
-  onKey: vi.fn(),
-  attachCustomKeyEventHandler: vi.fn(),
-  loadAddon: vi.fn(),
-};
-
-vi.mock("@xterm/xterm", async (importOriginal) => ({
-  ...(await importOriginal<typeof import("@xterm/xterm")>()),
-  Terminal: vi.fn().mockImplementation(() => mockTerminal),
-}));
-
 const renderTerminal = (commands: Command[] = []) =>
  renderWithProviders(<Terminal secrets={[]} />, {
    preloadedState: {
@@ -34,6 +14,26 @@ const renderTerminal = (commands: Command[] = []) =>
  });

 describe.skip("Terminal", () => {
+  global.ResizeObserver = vi.fn().mockImplementation(() => ({
+    observe: vi.fn(),
+    disconnect: vi.fn(),
+  }));
+
+  const mockTerminal = {
+    open: vi.fn(),
+    write: vi.fn(),
+    writeln: vi.fn(),
+    dispose: vi.fn(),
+    onKey: vi.fn(),
+    attachCustomKeyEventHandler: vi.fn(),
+    loadAddon: vi.fn(),
+  };
+
+  vi.mock("@xterm/xterm", async (importOriginal) => ({
+    ...(await importOriginal<typeof import("@xterm/xterm")>()),
+    Terminal: vi.fn().mockImplementation(() => mockTerminal),
+  }));
+
  afterEach(() => {
    vi.clearAllMocks();
  });
--- a/frontend/tests/hooks/use-terminal.test.tsx
+++ b/frontend/tests/hooks/use-terminal.test.tsx
@@ -5,7 +5,6 @@ import { ReactNode } from "react";
 import { useTerminal } from "#/hooks/use-terminal";
 import { Command } from "#/state/command-slice";

-
 interface TestTerminalComponentProps {
  commands: Command[];
  secrets: string[];
@@ -15,7 +14,7 @@ function TestTerminalComponent({
  commands,
  secrets,
 }: TestTerminalComponentProps) {
-  const ref = useTerminal(commands, secrets);
+  const ref = useTerminal({ commands, secrets, disabled: false });
  return <div ref={ref} />;
 }

@@ -24,9 +23,7 @@ interface WrapperProps {
 }

 function Wrapper({ children }: WrapperProps) {
-  return (
-    <div>{children}</div>
-  )
+  return <div>{children}</div>;
 }

 describe("useTerminal", () => {
--- a/frontend/tests/routes/_oh.app.test.tsx
+++ b/frontend/tests/routes/_oh.app.test.tsx
@@ -1,5 +0,0 @@
-import { describe, it } from "vitest";
-
-describe("App", () => {
-  it.todo("should render");
-});
--- a/frontend/tests/routes/_oh.test.tsx
+++ b/frontend/tests/routes/_oh.test.tsx
@@ -1,4 +1,5 @@
 import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
+import * as router from "react-router";
 import { createRoutesStub } from "react-router";
 import { screen, waitFor, within } from "@testing-library/react";
 import { renderWithProviders } from "test-utils";
@@ -39,12 +40,6 @@ describe("frontend/routes/_oh", () => {
    await screen.findByTestId("root-layout");
  });

-  it("should render the AI config modal if the user is authed", async () => {
-    // Our mock return value is true by default
-    renderWithProviders(<RouteStub />);
-    await screen.findByTestId("ai-config-modal");
-  });
-
  it("should render the AI config modal if settings are not up-to-date", async () => {
    settingsAreUpToDateMock.mockReturnValue(false);
    renderWithProviders(<RouteStub />);
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
openhands	2959abf4ba	Add missing imports to ActionExecutionClient	2024-12-25 15:54:31 +00:00
openhands	0125a5415f	Move all run_action logic to ActionExecutionClient	2024-12-25 15:52:08 +00:00
openhands	65de07299f	Refactor runtime action execution - Create ActionExecutionClient base class for shared HTTP server interaction logic - Update EventStreamRuntime and RemoteRuntime to inherit from ActionExecutionClient - Remove duplicate code and clean up imports - Update ModalRuntime and RunloopRuntime to use super().__init__()	2024-12-25 15:47:02 +00:00
Robert Brennan	642e962f89	randomize branch names (#5784 )	2024-12-24 15:28:27 -05:00
Robert Brennan	d4e670a3e7	fix latest event id (#5789 )	2024-12-24 18:08:33 +00:00
Robert Brennan	f9cc0bce53	Fix connection check (#5787 )	2024-12-24 16:21:31 +00:00
dependabot[bot]	2c8b1ee136	chore(deps-dev): bump llama-index from 0.12.7 to 0.12.8 in the llama group (#5765 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-12-24 15:24:36 +00:00
Robert Brennan	31dda63f43	Don't enforce user IDs in oss mode (#5776 )	2024-12-24 06:30:33 -05:00
Boxuan Li	ecff5c67fb	Evaluation README: Add TheAgentCompany (#5777 )	2024-12-24 02:37:42 +00:00
mamoodi	725e71ad22	Update Slack links again (#5773 )	2024-12-23 21:20:08 +00:00
OpenHands	200270ba8f	Fix issue #5752 : Install "jq" by default in OpenHands runtime (#5753 )	2024-12-23 16:16:36 -05:00
Robert Brennan	5bf55a0035	show most recent convo on homepage (#5769 )	2024-12-23 20:04:05 +00:00
Robert Brennan	96329190d1	Session fixes for HA mode (#5766 )	2024-12-23 18:07:56 +00:00
Robert Brennan	faf8b5829c	Fix for dying sessions/runtimes (#5755 )	2024-12-23 16:00:05 +00:00
sp.wack	d62cf7e731	refactor(frontend): Remove test todos and fix light warning (#5554 )	2024-12-23 18:43:36 +04:00
Engel Nyst	4a8bf3d2d0	Fix not initialized response latencies (#5679 )	2024-12-22 16:31:05 -05:00
Robert Brennan	2cfbd26df7	Fixes for VS Code Button (#5754 )	2024-12-22 16:27:30 -05:00
tofarr	b51dd3bc75	Fix stack trace in logs (#5751 )	2024-12-22 14:51:22 -05:00
Boxuan Li	b1719bb3db	Add TheAgentCompany evaluation harness (#5731 )	2024-12-22 14:12:30 -05:00
Rohit Malhotra	ee5f49afc1	[Bug]: Missing path import (#5747 )	2024-12-22 15:58:17 +00:00
Rohit Malhotra	7fe692a7bd	Revert "[Resolver]: Add target branch param" (#5743 )	2024-12-22 01:28:23 +00:00
OpenHands	21948fa81b	Fix issue #5735 : [Bug]: Inconsistent command line arguments in evaluation directory (#5736 )	2024-12-22 04:41:39 +08:00
Robert Brennan	d646b2089d	Fix several async lockups (#5734 )	2024-12-21 19:07:31 +00:00
Robert Brennan	f54d953fe1	Fix unclosed github client (#5733 )	2024-12-21 13:51:37 -05:00
Rohit Malhotra	4e7af78b39	Fix missing closing brace in openhands-resolver.yml (#5729 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-21 15:22:41 +00:00
Rohit Malhotra	252c70984c	[Resolver]: Rename success_explanation to result_explanation for better clarity (#5724 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>	2024-12-21 01:31:05 +00:00
Rohit Malhotra	5ea096e95b	[Resolver]: Add target branch param (#5642 )	2024-12-21 00:33:45 +00:00
Robert Brennan	a01fb9dca3	Fixes for listing files, clean up references to tokens (#5718 )	2024-12-20 23:13:14 +00:00
Rohit Malhotra	51af29208f	[Resolver]: Indicating more informative failures (#5685 )	2024-12-20 17:22:24 -05:00
mamoodi	e77f435901	Add note about custom configurations (#5721 )	2024-12-20 17:20:11 -05:00
mamoodi	5fb0eec61e	Fix resolver workflow and update docs (#5713 )	2024-12-20 15:59:13 -05:00
Rohit Malhotra	4af84a29dc	Adding more resilience to refresh token logic (#5704 )	2024-12-20 14:37:04 -05:00
Ryan H. Tran	7a0488c012	Use more specific action types for openhands-aci commands (#5508 ) Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>	2024-12-21 02:48:27 +08:00
Xingyao Wang	581d5ec7a8	feat(eval): increase resource factor for remote runtime when previous run failed due to resource (#5709 )	2024-12-21 01:47:06 +08:00
Xingyao Wang	cfbe77b367	fix: only register atexit when EventStreamRuntime is initialized (#5712 )	2024-12-20 16:29:45 +00:00
sp.wack	3236602919	fix(frontend): Create a conversation without a query (#5711 )	2024-12-20 16:24:30 +00:00
dependabot[bot]	aa2f34a1f5	chore(deps-dev): bump llama-index from 0.12.6 to 0.12.7 in the llama group (#5708 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-12-20 17:16:32 +01:00
Robert Brennan	73c38f1163	refactor: move session initialization from WebSocket to REST API (#5493 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com>	2024-12-20 15:50:09 +00:00
dependabot[bot]	0dd919bacf	Bump prism-react-renderer from 2.4.0 to 2.4.1 in /docs in the version-all group (#5668 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-12-20 16:43:12 +04:00
d-walsh	5ad361623d	feat: add support for custom PR titles (#5706 ) Co-authored-by: David Walsh <walsha@gmail.com>	2024-12-20 04:00:00 +00:00
Xingyao Wang	c333938384	feat(eval): add standard error to swebench summarize outputs (#5700 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-20 08:39:43 +08:00
tofarr	ebf3bf606a	Settings store type is defined in openhands_config rather than main config (#5701 )	2024-12-19 12:44:35 -07:00
dependabot[bot]	c2293ad1dd	Bump the version-all group across 1 directory with 13 updates (#5699 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-12-19 20:08:22 +01:00
mamoodi	6f7d054385	Add examples for filesystem use (#5697 )	2024-12-19 13:13:09 -05:00
Xingyao Wang	e9cafb0372	chore: Cleanup runtime exception handling (#5696 )	2024-12-19 17:28:29 +00:00
mamoodi	13097f9d1d	Release 0.16.1 (#5693 )	2024-12-19 11:13:26 -05:00
OpenHands	2a66439ca6	Fix issue #5676 : [Bug]: Frontend Hyperlink in Chat window should open link in a new tab (#5677 ) Co-authored-by: Xingyao Wang <xingyao6@illinois.edu>	2024-12-19 14:39:00 +00:00
Rohit Malhotra	3876f4a59c	[Bug]: Prevent selection of "add more repo" option in dropdown (#5688 )	2024-12-19 16:00:10 +04:00
Rohit Malhotra	3db118f3d9	[Bug]: Fixing next page param extraction for app installation repos reponse (#5687 )	2024-12-19 03:29:22 +00:00
tofarr	fe1bb1c233	Feat config server side store (#5594 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-18 15:18:56 -07:00
mamoodi	154ef7391a	Release 0.16.0 (#5678 )	2024-12-18 16:31:38 -05:00
tofarr	5498ca1f8b	Fix: Agent gets stuck in closing and server won't die (#5675 )	2024-12-18 18:47:27 +00:00
Xingyao Wang	2cc6a51fe8	chore: cleanup log - make "cannot restore state" a debug message (#5674 )	2024-12-18 18:43:28 +00:00
dependabot[bot]	409d132747	Bump llama-index from 0.12.5 to 0.12.6 in the llama group (#5669 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-12-18 19:06:31 +01:00
Rohit Malhotra	2c47a1b33f	[Bug]: Settings modal opens on every refresh (#5670 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-18 12:32:21 -05:00
Xingyao Wang	8983eb4cc1	fix(eval): Increase RemoteRuntime request timeouts to cope with busy cluster (#5671 )	2024-12-18 17:10:38 +00:00
Robert Brennan	bd3e38fe67	Implement file-by-file download with progress (#5008 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-18 08:37:43 -05:00
Calvin Smith	8488dd2a03	fix: Gracefully handling negative response latencies (#5660 ) Co-authored-by: Calvin Smith <calvin@all-hands.dev>	2024-12-18 01:43:41 +01:00
Rohit Malhotra	d16842f413	[Bug][Resolver]: Skip bot comment when PR is updated (#5628 ) Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>	2024-12-17 16:53:17 -05:00
Xingyao Wang	9cdb8d06c0	fix(eval): Use cp -r instead of mv for SWE-Bench Initialization (#5659 )	2024-12-17 21:21:27 +00:00
Engel Nyst	3297e4d5a8	Use litellm's modify params (#5636 )	2024-12-17 21:32:49 +01:00
Rohit Malhotra	f9d052c493	[Refactor]: Changes to Github Authentication (#5371 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>	2024-12-17 15:13:40 -05:00
dependabot[bot]	dc3e43b999	Bump the version-all group with 4 updates (#5649 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-12-17 21:08:48 +01:00
Robert Brennan	8bd2205258	Fix expandable messages (#5650 ) Co-authored-by: amanape <83104063+amanape@users.noreply.github.com>	2024-12-17 15:03:45 -05:00
Robert Brennan	6ae84bf992	fix session leak (#5656 )	2024-12-17 14:38:33 -05:00
mamoodi	afea9f4bec	Release 0.15.3 (#5654 )	2024-12-17 18:59:45 +00:00
sp.wack	8b1a7dff7e	fix(frontend): Prevent from setting multiple terminal stdin listeners on re-renders (#5653 )	2024-12-17 17:57:34 +00:00
Ryan H. Tran	5e3123964f	Downgrade openhands-aci to 0.1.2 (#5646 )	2024-12-17 15:44:39 +01:00
Motin	1ffd66f62e	chore: Add .nvmrc for automatic selection of Node v20 for nvm users (#5624 )	2024-12-17 14:04:24 +00:00
sp.wack	b04ec03062	fix(frontend): Disable terminal stdin if the runtime is starting up (#5625 )	2024-12-17 11:57:19 +04:00
Frank Xu	ee8438cd59	remove unused prompts from legacy browsing agent code (#5643 )	2024-12-17 14:00:40 +08:00
Rohit Malhotra	7071742d4a	[Bug][Resolver]: Multiline comments crashing resolver (#5641 )	2024-12-16 21:31:14 -05:00
tofarr	d76e83b55e	Fix: Mocking LLM proxy in unit tests (#5639 )	2024-12-16 15:59:41 -07:00
OpenHands	239619a0a1	Fix issue #5633 : [Bug]: Decimal points in maximum iteration messages not needed (#5635 ) Co-authored-by: Graham Neubig <neubig@gmail.com>	2024-12-16 17:54:37 -05:00
Robert Brennan	50478c7d21	fix: wrap multi-line code blocks in pre tag (#5586 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-16 16:30:06 -05:00
OpenHands	4998b5de32	Fix issue #5559 : The turn limit should be measured from the last user interaction (#5560 ) Co-authored-by: Graham Neubig <neubig@gmail.com> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>	2024-12-16 16:28:23 -05:00
sp.wack	dd79acdae1	Fix vscode url fetch regression (#5632 )	2024-12-16 20:48:12 +00:00
Engel Nyst	b295f5775c	Revert "Fix issue #5609 : Use litellm's modify_params with default True" (#5631 )	2024-12-16 20:39:57 +00:00
sp.wack	dabf0ce3af	fix(frontend): Trigger VSCode URL query only when runtime is active (#5622 )	2024-12-16 14:31:57 -05:00
OpenHands	09735c7869	Fix issue #5609 : Use litellm's modify_params with default True (#5611 ) Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>	2024-12-16 20:18:45 +01:00
Xingyao Wang	e0b231092a	feat: support directly stream logs from container to stdout in debug mode (#5408 )	2024-12-16 19:17:57 +00:00
Xingyao Wang	d6a2c4b167	runtime(eval): increase runtime status request timeout for sessions (#5619 )	2024-12-17 00:51:08 +08:00
dependabot[bot]	6db32025b4	bump the version-all group across 1 directory with 26 updates (#5630 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-12-16 16:46:06 +00:00
OpenHands	fdc00fbca0	Fix issue #5605 : [Bug]: UI regression, Jupyter tab has no vertical scroll bar, cannot see all actions (#5607 )	2024-12-16 09:20:36 -05:00
sp.wack	08b1031666	fix(frontend): Prevent from submitting empty characters (#5545 )	2024-12-16 07:33:26 +00:00
Engel Nyst	ad822a31e1	Fix issue #5591 : Clean up unused code (#5592 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-16 08:22:21 +01:00
Engel Nyst	590ebb6e47	Small fix and addition for token counting (#5550 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-15 15:12:05 +01:00
Engel Nyst	4716955960	Remove unused codeact-SWE agent (#5600 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-14 20:49:44 +01:00
Engel Nyst	f0257c793b	fix: allow to continue when the agent is stuck in interactive mode (#5597 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-14 20:49:04 +01:00
Calvin Smith	7ef6fa666d	feat(eval): Response Latency Tracking (#5588 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Calvin Smith <calvin@all-hands.dev>	2024-12-13 22:51:13 +01:00
Robert Brennan	e0626a5741	Remove docker layer caching (#5587 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-13 16:13:26 -05:00
Robert Brennan	deb2d330b6	Reset iteration budget and update default max_iterations to 500 (#5590 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-13 20:46:40 +00:00
Engel Nyst	d733bc6bdd	fix: serialize tool calls (#5553 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-13 20:51:03 +01:00
tofarr	d782bdf691	Persist jwt_secret in config file (#5353 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-13 11:05:19 -07:00
OpenHands	9c950f499e	[Resolver] Reorder and comment workflow conditions (#5446 ) Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>	2024-12-13 11:07:37 -05:00
Ryan H. Tran	8ae2fb636e	Remove symlink use for swebench setup (#5549 )	2024-12-13 22:18:14 +08:00
sp.wack	de75bd0690	fix(frontend): Prevent VSCode from opening when remounting (#5544 )	2024-12-13 09:35:34 +04:00
tofarr	2fb45d410d	Fix: Making the logs quieter (#5525 )	2024-12-12 19:36:13 -07:00
mamoodi	8300cf0436	Specify unsupported paths for installing OpenHands (#5540 )	2024-12-12 16:26:18 -05:00
mamoodi	7dd2bc569f	Restart troubleshooting documentation. (#5317 )	2024-12-12 15:49:18 -05:00
Robert Brennan	6e1fae29c9	Add note about design partner program to README (#5570 )	2024-12-12 20:13:07 +00:00
sp.wack	19525a487c	fix(frontend): Trim settings data when setting to storage (#5567 )	2024-12-12 22:36:17 +04:00
Engel Nyst	7d0405282a	Apply context window truncation for certain bad requests (#5566 )	2024-12-12 18:11:59 +00:00
OpenHands	92c166551f	Fix issue #5563 : [Bug]: Prompt is not deleted when the user sends a message (#5564 )	2024-12-12 10:06:40 -08:00
Xingyao Wang	ebb68b33db	Fix issue #5527 : Document repository customization and micro-agents (#5528 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-12 09:47:28 -08:00
sp.wack	37c46f1ed8	fix(frontend): Prevent push message from being rendered twice (#5546 )	2024-12-12 09:19:48 -08:00
Engel Nyst	ac5190c283	Add voyage ai embeddings (#5547 )	2024-12-12 09:19:05 -08:00
dependabot[bot]	ed3916b79b	chore(deps-dev): bump @tanstack/eslint-plugin-query from 5.61.4 to 5.62.1 in /frontend in the eslint group (#5556 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-12-12 15:30:02 +00:00
mamoodi	27a647cd3e	Release 0.15.2 (#5552 )	2024-12-12 10:09:47 -05:00
sp.wack	42a536d450	Revert "chore(deps): bump the version-all group across 1 directory with 30 updates" (#5548 )	2024-12-12 13:48:57 +04:00
dependabot[bot]	41e564dc41	chore(deps): bump the version-all group across 1 directory with 30 updates (#5522 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: amanape <83104063+amanape@users.noreply.github.com>	2024-12-12 12:18:26 +04:00
Graham Neubig	e979f51ea5	Fix chat input not clearing after image paste/drop (#5342 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-11 22:18:38 -08:00
Engel Nyst	425ccb0fbb	Clean up empty content fix (revert #4935 ) (#5539 )	2024-12-12 02:48:06 +00:00
Cheng Yang	7e4c1c733b	feat(sandbox): add support for extra Docker build arguments (#5447 )	2024-12-12 10:21:46 +08:00
Engel Nyst	ffd472d6b8	Update litellm (#5520 )	2024-12-12 03:12:50 +01:00
mamoodi	2f2ea9ec91	Update the doc for headless to include no continue (#5537 )	2024-12-12 02:03:06 +00:00
OpenHands	6a6ce5f3ee	Fix issue #5478 : Add color to the line next to "Ran a XXX Command" based on return value (#5483 ) Co-authored-by: Graham Neubig <neubig@gmail.com>	2024-12-11 23:20:29 +00:00
Xingyao Wang	907c65cc00	chore: add back accidentally removed repo info (#5532 )	2024-12-12 05:51:05 +08:00
tofarr	a6d1a4c98f	Fix: Redis listener attached at startup (#5516 )	2024-12-11 09:39:57 -05:00
Robert Brennan	a60ee09881	Add docker layer caching to ghcr build (#5517 )	2024-12-11 09:39:09 -05:00
Graham Neubig	246107c618	Parallize Python Unit tests (#5499 )	2024-12-11 01:05:29 -08:00
Robert Brennan	5fa18511b3	minor fixes for when commands time out (#5518 )	2024-12-10 21:55:02 +00:00
Rohit Malhotra	a482182a9e	Remove Beta label from Browser tab (#5484 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-10 15:10:00 -05:00
tofarr	58d22a1905	Fix for issue where double scroll hides save button (#5488 )	2024-12-10 19:50:49 +00:00
dependabot[bot]	17bbfa29a1	chore(deps): bump react-use from 17.5.1 to 17.6.0 in /docs in the version-all group (#5505 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-12-10 23:40:20 +04:00
tofarr	5fe116cfb1	Make layout responsive for mobile devices (#5475 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-11 03:31:20 +08:00
Xingyao Wang	e9637d40b9	Add browser observations to chat interface (#5514 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-11 03:30:44 +08:00
OpenHands	6de177521f	Fix issue #5450 : In openhands-resolver.yml, request code review from the person who initiated the workflow (#5451 ) Co-authored-by: Graham Neubig <neubig@gmail.com>	2024-12-10 12:19:55 -05:00
Xingyao Wang	9d36b80b96	Fix duplicate search messages in web browsing actions (#5511 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-10 15:51:03 +00:00
Engel Nyst	b11e905988	Verify costs script (#5469 )	2024-12-10 14:20:53 +01:00
dependabot[bot]	39e5311233	chore(deps-dev): bump the llama group across 1 directory with 3 updates (#5503 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-12-10 07:00:45 +00:00
Engel Nyst	651ed1c3c8	Dependabot config for any browsergym-* package (#5501 )	2024-12-10 01:27:11 -05:00
tofarr	e27c2e9c99	Fix: Auto-refresh file content when selected file changes (#5476 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-09 21:17:41 -05:00
Rohit Malhotra	cfe222e1d5	Fix issue #5162 : docs: Improve GitHub token setup documentation in UI… (#5491 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-09 21:14:55 -05:00
tofarr	c872af4658	Doc: Added troubleshooting section for Nebulous docker errors (#5482 )	2024-12-09 22:04:23 +00:00
OpenHands	99fa6c6a4a	Fix issue #5186 : [Bug]: Fix up inline code styles in chat window (#5226 ) Co-authored-by: Graham Neubig <neubig@gmail.com> Co-authored-by: amanape <83104063+amanape@users.noreply.github.com>	2024-12-09 16:33:25 -05:00
OpenHands	3946f813a4	Fix issue #5471 : Resolver: LLM_MODEL should use "variable" instead of "secret" (#5477 )	2024-12-09 16:08:45 -05:00
Engel Nyst	455e667739	add cost to summary (#5473 )	2024-12-10 03:14:03 +08:00
Engel Nyst	2874041381	Fix stuck execution flow (#5458 )	2024-12-08 22:39:32 +01:00
Engel Nyst	279e1d7abc	Resolver minor tweaks (#5461 )	2024-12-08 12:34:01 -05:00
Graham Neubig	a7e4a7aa63	Improve error message when issue/PR not found in resolver (#5455 ) Co-authored-by: openhands <openhands@all-hands.dev>	2024-12-07 23:34:55 -05:00
Engel Nyst	2466d903df	Update version (#5459 )	2024-12-07 18:59:46 -05:00
Cheng Yang	424cdf121a	Feat/better log: Add colorize function and TermColor enum for text coloring (#5410 ) Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>	2024-12-07 16:30:40 -05:00
Graham Neubig	6972f4806f	Update resolver README.md to fix repo location (#5454 )	2024-12-07 21:02:45 +00:00
Graham Neubig	78cc552e3a	Fix syntax in external openhands-resolver.yml (#5453 )	2024-12-07 20:46:20 +00:00
				`@@ -1 +0,0 @@`
				`The files in this directory configure a development container for GitHub Codespaces.`