Fix issue #3325 : '[Documentation]: config.toml options should be documented on the doc web site'

Update instruction for new version of eval runtime-api (#4128 )
Update PR Template for better release notes (#4126 )
2026-04-29 03:00:45 -04:00 · 2024-10-01 14:24:18 +00:00 · 2024-09-30 23:48:38 +00:00 · 2024-09-30 17:06:56 -04:00 · 2024-09-30 18:59:57 +00:00 · 2024-10-01 02:40:23 +08:00
152 changed files with 3284 additions and 2928 deletions
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,4 +1,6 @@
-**Short description of the problem this fixes or functionality that this introduces. This may be used for the CHANGELOG**
+- [ ] Include this change in the Release Notes. If checked, you must provide an **end-user friendly** description for your change below
+
+**End-user friendly description of the problem this fixes or functionality that this introduces**



--- a/.github/workflows/dummy-agent-test.yml
+++ b/.github/workflows/dummy-agent-test.yml
@@ -14,20 +14,38 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # this might remove tools that are actually needed,
+          # if set to "true" but frees about 6 GB
+          tool-cache: true
+          # all of these default to true, but feel free to set to
+          # "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          docker-images: false
+          swap-storage: true
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Install poetry via pipx
+        run: pipx install poetry
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
-      - name: Set up environment
-        run: |
-          curl -sSL https://install.python-poetry.org | python3 -
-          poetry install --without evaluation,llama-index
-          poetry run playwright install --with-deps chromium
-          wget https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json -P /tmp/llama_index/models--BAAI--bge-small-en-v1.5/snapshots/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/1_Pooling/
+          cache: 'poetry'
+      - name: Install Python dependencies using Poetry
+        run: poetry install --without evaluation,llama-index
+      - name: Build Environment
+        run: make build
      - name: Run tests
        run: |
          set -e
-          poetry run python openhands/core/main.py -t "do a flip" -d ./workspace/ -c DummyAgent
+          poetry run python3 openhands/core/main.py -t "do a flip" -d ./workspace/ -c DummyAgent
      - name: Check exit code
        run: |
          if [ $? -ne 0 ]; then
--- a/.github/workflows/ghcr_runtime.yml
+++ b/.github/workflows/ghcr_runtime.yml
@@ -25,7 +25,71 @@ on:
        required: true
        default: ''

+env:
+  BASE_IMAGE_FOR_HASH_EQUIVALENCE_TEST: nikolaik/python-nodejs:python3.11-nodejs22
+
 jobs:
+  # Builds the OpenHands Docker images
+  ghcr_build_app:
+    name: Build App Image
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    outputs:
+      hash_from_app_image: ${{ steps.get_hash_in_app_image.outputs.hash_from_app_image }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # this might remove tools that are actually needed,
+          # if set to "true" but frees about 6 GB
+          tool-cache: true
+          # all of these default to true, but feel free to set to
+          # "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          docker-images: false
+          swap-storage: true
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3.0.0
+        with:
+          image: tonistiigi/binfmt:latest
+      - name: Login to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Build and push app image
+        if: "!github.event.pull_request.head.repo.fork"
+        run: |
+          ./containers/build.sh -i openhands -o ${{ github.repository_owner }} --push
+      - name: Build app image
+        if: "github.event.pull_request.head.repo.fork"
+        run: |
+          ./containers/build.sh -i openhands -o ${{ github.repository_owner }} --load
+      - name: Get hash in App Image
+        id: get_hash_in_app_image
+        run: |
+          # Lowercase the repository owner
+          export REPO_OWNER=${{ github.repository_owner }}
+          REPO_OWNER=$(echo $REPO_OWNER | tr '[:upper:]' '[:lower:]')
+          # Run the build script in the app image
+          docker run -e SANDBOX_USER_ID=0 -v /var/run/docker.sock:/var/run/docker.sock ghcr.io/${REPO_OWNER}/openhands:${{ github.sha }} /bin/bash -c "mkdir -p containers/runtime; python3 openhands/runtime/utils/runtime_build.py --base_image ${{ env.BASE_IMAGE_FOR_HASH_EQUIVALENCE_TEST }} --build_folder containers/runtime --force_rebuild" 2>&1 | tee docker-outputs.txt
+          # Get the hash from the build script
+          hash_from_app_image=$(cat docker-outputs.txt | grep "Hash for docker build directory" | awk -F "): " '{print $2}' | uniq | head -n1)
+          echo "hash_from_app_image=$hash_from_app_image" >> $GITHUB_OUTPUT
+          echo "Hash from app image: $hash_from_app_image"
+
+
  # Builds the runtime Docker images
  ghcr_build_runtime:
    name: Build Image
@@ -56,7 +120,9 @@ jobs:
          docker-images: false
          swap-storage: true
      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
+        uses: docker/setup-qemu-action@v3.0.0
+        with:
+          image: tonistiigi/binfmt:latest
      - name: Login to GHCR
        uses: docker/login-action@v3
        with:
@@ -88,7 +154,7 @@ jobs:
      - name: Build and push runtime image ${{ matrix.base_image.image }}
        if: github.event.pull_request.head.repo.fork != true
        run: |
-          ./containers/build.sh runtime ${{ github.repository_owner }} --push ${{ matrix.base_image.tag }}
+          ./containers/build.sh -i runtime -o ${{ github.repository_owner }} --push -t ${{ matrix.base_image.tag }}
      # Forked repos can't push to GHCR, so we need to upload the image as an artifact
      - name: Build runtime image ${{ matrix.base_image.image }} for fork
        if: github.event.pull_request.head.repo.fork
@@ -104,6 +170,56 @@ jobs:
          name: runtime-${{ matrix.base_image.tag }}
          path: /tmp/runtime-${{ matrix.base_image.tag }}.tar

+  verify_hash_equivalence_in_runtime_and_app:
+    name: Verify Hash Equivalence in Runtime and Docker images
+    runs-on: ubuntu-latest
+    needs: [ghcr_build_runtime, ghcr_build_app]
+    strategy:
+      fail-fast: false
+      matrix:
+        base_image: ['nikolaik']
+    steps:
+      - uses: actions/checkout@v4
+      - name: Cache Poetry dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.virtualenvs
+          key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Install poetry via pipx
+        run: pipx install poetry
+      - name: Install Python dependencies using Poetry
+        run: make install-python-dependencies
+      - name: Get hash in App Image
+        run: |
+          echo "Hash from app image: ${{ needs.ghcr_build_app.outputs.hash_from_app_image }}"
+          echo "hash_from_app_image=${{ needs.ghcr_build_app.outputs.hash_from_app_image }}" >> $GITHUB_ENV
+
+      - name: Get hash using code (development mode)
+        run: |
+          mkdir -p containers/runtime
+          poetry run python3 openhands/runtime/utils/runtime_build.py --base_image ${{ env.BASE_IMAGE_FOR_HASH_EQUIVALENCE_TEST }} --build_folder containers/runtime --force_rebuild > output.txt 2>&1
+          hash_from_code=$(cat output.txt | grep "Hash for docker build directory" | awk -F "): " '{print $2}' | uniq | head -n1)
+          echo "hash_from_code=$hash_from_code" >> $GITHUB_ENV
+
+      - name: Compare hashes
+        run: |
+          echo "Hash from App Image: ${{ env.hash_from_app_image }}"
+          echo "Hash from Code: ${{ env.hash_from_code }}"
+          if [ "${{ env.hash_from_app_image }}" = "${{ env.hash_from_code }}" ]; then
+            echo "Hashes match!"
+          else
+            echo "Hashes do not match!"
+            exit 1
+          fi
+
  # Run unit tests with the EventStream runtime Docker images as root
  test_runtime_root:
    name: RT Unit Tests (Root)
@@ -115,6 +231,23 @@ jobs:
        base_image: ['nikolaik']
    steps:
      - uses: actions/checkout@v4
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # this might remove tools that are actually needed,
+          # if set to "true" but frees about 6 GB
+          tool-cache: true
+          # all of these default to true, but feel free to set to
+          # "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          docker-images: false
+          swap-storage: true
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@v3
      # Forked repos can't push to GHCR, so we need to download the image as an artifact
      - name: Download runtime image for fork
        if: github.event.pull_request.head.repo.fork
@@ -145,8 +278,7 @@ jobs:
        run: make install-python-dependencies
      - name: Run runtime tests
        run: |
-          # We install pytest-xdist in order to run tests across CPUs. However, tests start to fail when we run
-          # then across more than 2 CPUs for some reason
+          # We install pytest-xdist in order to run tests across CPUs
          poetry run pip install pytest-xdist

          # Install to be able to retry on failures for flaky tests
@@ -158,10 +290,10 @@ jobs:
          SKIP_CONTAINER_LOGS=true \
          TEST_RUNTIME=eventstream \
          SANDBOX_USER_ID=$(id -u) \
-          SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
+          SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
          TEST_IN_CI=true \
          RUN_AS_OPENHANDS=false \
-          poetry run pytest -n 3 --reruns 1 --reruns-delay 3 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
+          poetry run pytest -n 3 -raR --reruns 1 --reruns-delay 3 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v4
        env:
@@ -177,6 +309,23 @@ jobs:
        base_image: ['nikolaik']
    steps:
      - uses: actions/checkout@v4
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # this might remove tools that are actually needed,
+          # if set to "true" but frees about 6 GB
+          tool-cache: true
+          # all of these default to true, but feel free to set to
+          # "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          docker-images: false
+          swap-storage: true
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@v3
      # Forked repos can't push to GHCR, so we need to download the image as an artifact
      - name: Download runtime image for fork
        if: github.event.pull_request.head.repo.fork
@@ -207,8 +356,7 @@ jobs:
        run: make install-python-dependencies
      - name: Run runtime tests
        run: |
-          # We install pytest-xdist in order to run tests across CPUs. However, tests start to fail when we run
-          # then across more than 2 CPUs for some reason
+          # We install pytest-xdist in order to run tests across CPUs
          poetry run pip install pytest-xdist

          # Install to be able to retry on failures for flaky tests
@@ -220,10 +368,10 @@ jobs:
          SKIP_CONTAINER_LOGS=true \
          TEST_RUNTIME=eventstream \
          SANDBOX_USER_ID=$(id -u) \
-          SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
+          SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
          TEST_IN_CI=true \
          RUN_AS_OPENHANDS=true \
-          poetry run pytest -n 3 --reruns 1 --reruns-delay 3 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
+          poetry run pytest -n 3 -raR --reruns 1 --reruns-delay 3 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v4
        env:
@@ -240,6 +388,23 @@ jobs:
        base_image: ['nikolaik']
    steps:
      - uses: actions/checkout@v4
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # this might remove tools that are actually needed,
+          # if set to "true" but frees about 6 GB
+          tool-cache: true
+          # all of these default to true, but feel free to set to
+          # "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          docker-images: false
+          swap-storage: true
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@v3
      # Forked repos can't push to GHCR, so we need to download the image as an artifact
      - name: Download runtime image for fork
        if: github.event.pull_request.head.repo.fork
@@ -275,7 +440,7 @@ jobs:

          TEST_RUNTIME=eventstream \
          SANDBOX_USER_ID=$(id -u) \
-          SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
+          SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
          TEST_IN_CI=true \
          TEST_ONLY=true \
          ./tests/integration/regenerate.sh
@@ -292,7 +457,7 @@ jobs:
    name: All Runtime Tests Passed
    if: ${{ !cancelled() && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}
    runs-on: ubuntu-latest
-    needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux]
+    needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux, verify_hash_equivalence_in_runtime_and_app]
    steps:
      - name: All tests passed
        run: echo "All runtime tests have passed successfully!"
@@ -301,7 +466,7 @@ jobs:
    name: All Runtime Tests Passed
    if: ${{ cancelled() || contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
    runs-on: ubuntu-latest
-    needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux]
+    needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux, verify_hash_equivalence_in_runtime_and_app]
    steps:
      - name: Some tests failed
        run: |
--- a/.github/workflows/ghcr_app.yml
+++ b/.github/workflows/ghcr_app.yml
@@ -1,65 +0,0 @@
-# Workflow that builds, tests and then pushes the app docker images to the ghcr.io repository
-name: Build and Publish App Image
-
-# Always run on "main"
-# Always run on tags
-# Always run on PRs
-# Can also be triggered manually
-on:
-  push:
-    branches:
-      - main
-    tags:
-      - '*'
-  pull_request:
-  workflow_dispatch:
-    inputs:
-      reason:
-        description: 'Reason for manual trigger'
-        required: true
-        default: ''
-
-jobs:
-  # Builds the OpenHands Docker images
-  ghcr_build:
-    name: Build App Image
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - name: Free Disk Space (Ubuntu)
-        uses: jlumbroso/free-disk-space@main
-        with:
-          # this might remove tools that are actually needed,
-          # if set to "true" but frees about 6 GB
-          tool-cache: true
-          # all of these default to true, but feel free to set to
-          # "false" if necessary for your workflow
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          docker-images: false
-          swap-storage: true
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-      - name: Login to GHCR
-        uses: docker/login-action@v3
-        with:
-          registry: ghcr.io
-          username: ${{ github.repository_owner }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-      - name: Set up Docker Buildx
-        id: buildx
-        uses: docker/setup-buildx-action@v3
-      - name: Build and push app image
-        if: "!github.event.pull_request.head.repo.fork"
-        run: |
-          ./containers/build.sh openhands ${{ github.repository_owner }} --push
-      - name: Build app image
-        if: "github.event.pull_request.head.repo.fork"
-        run: |
-          ./containers/build.sh openhands image ${{ github.repository_owner }}
--- a/.github/workflows/openhands-resolver.yml
+++ b/.github/workflows/openhands-resolver.yml
@@ -0,0 +1,13 @@
+name: Resolve Issues with OpenHands
+
+on:
+  issues:
+    types: [labeled]
+
+jobs:
+  call-openhands-resolver:
+    uses: All-Hands-AI/openhands-resolver/.github/workflows/openhands-resolver.yml@main
+    if: github.event.label.name == 'fix-me'
+    with:
+      issue_number: ${{ github.event.issue.number }}
+    secrets: inherit
--- a/.github/workflows/py-unit-tests.yml
+++ b/.github/workflows/py-unit-tests.yml
@@ -89,6 +89,9 @@ jobs:
          sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock
      - name: Build Environment
        run: make build
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@v3
      - name: Run Tests
        run: poetry run pytest --forked --cov=agenthub --cov=openhands --cov-report=xml ./tests/unit
      - name: Upload coverage to Codecov
@@ -107,6 +110,9 @@ jobs:
        python-version: ['3.11']
    steps:
      - uses: actions/checkout@v4
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@v3
      - name: Install poetry via pipx
        run: pipx install poetry
      - name: Set up Python
--- a/.github/workflows/regenerate_integration_tests.yml
+++ b/.github/workflows/regenerate_integration_tests.yml
@@ -29,6 +29,9 @@ jobs:
    steps:
    - name: Checkout repository
      uses: actions/checkout@v4
+    - name: Set up Docker Buildx
+      id: buildx
+      uses: docker/setup-buildx-action@v3
    - name: Set up Python
      uses: actions/setup-python@v5
      with:
--- a/.github/workflows/review-pr.yml
+++ b/.github/workflows/review-pr.yml
@@ -15,6 +15,9 @@ jobs:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
+    - name: Set up Docker Buildx
+      id: buildx
+      uses: docker/setup-buildx-action@v3
    - name: Set up Python
      uses: actions/setup-python@v5
      with:
--- a/.github/workflows/solve-issue.yml
+++ b/.github/workflows/solve-issue.yml
@@ -1,113 +0,0 @@
-# Workflow that uses OpenHands to resolve a GitHub issue. Issue must be labeled 'solve-this'
-name: Use OpenHands to Resolve GitHub Issue
-
-on:
-  issues:
-    types: [labeled]
-
-permissions:
-  contents: write
-  pull-requests: write
-  issues: write
-
-jobs:
-  dogfood:
-    if: github.event.label.name == 'solve-this'
-    runs-on: ubuntu-latest
-    container:
-      image: ghcr.io/all-hands-ai/openhands
-      volumes:
-        - /var/run/docker.sock:/var/run/docker.sock
-    steps:
-    - name: install git, github cli
-      run: apt-get install -y git gh
-    - name: Checkout Repository
-      uses: actions/checkout@v4
-    - name: Write Task File
-      env:
-        ISSUE_TITLE: ${{ github.event.issue.title }}
-        ISSUE_BODY: ${{ github.event.issue.body }}
-      run: |
-        echo "TITLE:" > task.txt
-        echo "${ISSUE_TITLE}" >> task.txt
-        echo "" >> task.txt
-        echo "BODY:" >> task.txt
-        echo "${ISSUE_BODY}" >> task.txt
-    - name: Set up environment
-      run: |
-        curl -sSL https://install.python-poetry.org | python3 -
-        export PATH="/github/home/.local/bin:$PATH"
-        poetry install --without evaluation,llama-index
-        poetry run playwright install --with-deps chromium
-    - name: Run OpenHands
-      env:
-        ISSUE_TITLE: ${{ github.event.issue.title }}
-        ISSUE_BODY: ${{ github.event.issue.body }}
-        LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-      run: |
-        # Append path to launch poetry
-        export PATH="/github/home/.local/bin:$PATH"
-        # Append path to correctly import package, note: must set pwd at first
-        export PYTHONPATH=$(pwd):$PYTHONPATH
-        WORKSPACE_MOUNT_PATH=$GITHUB_WORKSPACE poetry run python ./openhands/core/main.py -i 50 -f task.txt -d $GITHUB_WORKSPACE
-        rm task.txt
-    - name: Setup Git, Create Branch, and Commit Changes
-      run: |
-        # Setup Git configuration
-        git config --global --add safe.directory $PWD
-        git config --global user.name 'OpenHands'
-        git config --global user.email 'OpenHands@users.noreply.github.com'
-
-        # Create a unique branch name with a timestamp
-        BRANCH_NAME="fix/${{ github.event.issue.number }}-$(date +%Y%m%d%H%M%S)"
-
-        # Checkout new branch
-        git checkout -b $BRANCH_NAME
-
-        # Add all changes to staging, except task.txt
-        git add --all -- ':!task.txt'
-
-        # Commit the changes, if any
-        git commit -m "OpenHands: Resolve Issue #${{ github.event.issue.number }}"
-        if [ $? -ne 0 ]; then
-          echo "No changes to commit."
-          exit 0
-        fi
-
-        # Push changes
-        git push --set-upstream origin $BRANCH_NAME
-    - name: Fetch Default Branch
-      env:
-        GH_TOKEN: ${{ github.token }}
-      run: |
-        # Fetch the default branch using gh cli
-        DEFAULT_BRANCH=$(gh repo view --json defaultBranchRef --jq .defaultBranchRef.name)
-        echo "Default branch is $DEFAULT_BRANCH"
-        echo "DEFAULT_BRANCH=$DEFAULT_BRANCH" >> $GITHUB_ENV
-    - name: Generate PR
-      env:
-        GH_TOKEN: ${{ github.token }}
-      run: |
-        # Create PR and capture URL
-        PR_URL=$(gh pr create \
-          --title "OpenHands: Resolve Issue #2" \
-          --body "This PR was generated by OpenHands to resolve issue #2" \
-          --repo "foragerr/OpenHands" \
-          --head "${{ github.head_ref }}" \
-          --base "${{ env.DEFAULT_BRANCH }}" \
-          | grep -o 'https://github.com/[^ ]*')
-
-        # Extract PR number from URL
-        PR_NUMBER=$(echo "$PR_URL" | grep -o '[0-9]\+$')
-
-        # Set environment vars
-        echo "PR_URL=$PR_URL" >> $GITHUB_ENV
-        echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV
-
-    - name: Post Comment
-      env:
-        GH_TOKEN: ${{ github.token }}
-      run: |
-        gh issue comment ${{ github.event.issue.number }} \
-          -b "OpenHands raised [PR #${{ env.PR_NUMBER }}](${{ env.PR_URL }}) to resolve this issue."
--- a/.gitignore
+++ b/.gitignore
@@ -217,8 +217,6 @@ config.toml
 config.toml_
 config.toml.bak

-containers/agnostic_sandbox
-
 # swe-bench-eval
 image_build_logs
 run_instance_logs
--- a/.openhands_instructions
+++ b/.openhands_instructions
@@ -0,0 +1,27 @@
+OpenHands is an automated AI software engineer. It is a repo with a Python backend
+(in the `openhands` directory) and TypeScript frontend (in the `frontend` directory).
+
+General Setup:
+- To set up the entire repo, including frontend and backend, run `make build`
+
+Backend:
+- Located in the `openhands` directory
+- Testing:
+  - All tests are in `tests/unit/test_*.py`
+  - To test new code, run `poetry run pytest tests/unit/test_xxx.py` where `xxx` is the appropriate file for the current functionality
+  - Write all tests with pytest
+
+Frontend:
+- Located in the `frontend` directory
+- Prerequisites: A recent version of NodeJS / NPM
+- Setup: Run `npm install` in the frontend directory
+- Testing:
+  - Run tests: `npm run test`
+  - To run specific tests: `npm run test -- -t "TestName"`
+- Building:
+  - Build for production: `npm run build`
+- Environment Variables:
+  - Set in `frontend/.env` or as environment variables
+  - Available variables: VITE_BACKEND_HOST, VITE_USE_TLS, VITE_INSECURE_SKIP_VERIFY, VITE_FRONTEND_PORT
+- Internationalization:
+  - Generate i18n declaration file: `npm run make-i18n`
--- a/2
+++ b/2
@@ -190,7 +190,7 @@ build-frontend:
 # Start backend
 start-backend:
 	@echo "$(YELLOW)Starting backend...$(RESET)"
-	@poetry run uvicorn openhands.server.listen:app --host $(BACKEND_HOST) --port $(BACKEND_PORT) --reload --reload-exclude "workspace/*"
+	@poetry run uvicorn openhands.server.listen:app --host $(BACKEND_HOST) --port $(BACKEND_PORT) --reload --reload-exclude "$(shell pwd)/workspace"

 # Start frontend
 start-frontend:
--- a/README.md
+++ b/README.md
@@ -42,6 +42,8 @@ system requirements and more information.
 ```bash
 export WORKSPACE_BASE=$(pwd)/workspace

+docker pull ghcr.io/all-hands-ai/runtime:0.9-nikolaik
+
 docker run -it --pull=always \
    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.9-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
@@ -56,6 +58,10 @@ docker run -it --pull=always \

 You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!

+You'll need a model provider and API key. One option that works well: [Claude 3.5 Sonnet](https://www.anthropic.com/api), but you have [many options](https://docs.all-hands.dev/modules/usage/llms).
+
+---
+
 You can also run OpenHands in a scriptable [headless mode](https://docs.all-hands.dev/modules/usage/how-to/headless-mode),
 or as an [interactive CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode).

--- a/agenthub/browsing_agent/browsing_agent.py
+++ b/agenthub/browsing_agent/browsing_agent.py
@@ -197,7 +197,7 @@ class BrowsingAgent(Agent):
                )
            except Exception as e:
                logger.error(
-                    f'Error when trying to process the accessibility tree: {e}'
+                    'Error when trying to process the accessibility tree: %s', e
                )
                return MessageAction('Error encountered when browsing.')

@@ -218,7 +218,6 @@ class BrowsingAgent(Agent):

        response = self.llm.completion(
            messages=self.llm.format_messages_for_llm(messages),
-            temperature=0.0,
            stop=[')```', ')\n```'],
        )
        return self.response_parser.parse(response)
--- a/agenthub/codeact_agent/README.md
+++ b/agenthub/codeact_agent/README.md
@@ -10,20 +10,3 @@ The conceptual idea is illustrated below. At each turn, the agent can:
   - Execute any valid `Python` code with [an interactive Python interpreter](https://ipython.org/). This is simulated through `bash` command, see plugin system below for more details.

 ![image](https://github.com/All-Hands-AI/OpenHands/assets/38853559/92b622e3-72ad-4a61-8f41-8c040b6d5fb3)
-
-## Plugin System
-
-To make the CodeAct agent more powerful with only access to `bash` action space, CodeAct agent leverages OpenHands's plugin system:
- [Jupyter plugin](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/runtime/plugins/jupyter): for IPython execution via bash command
- [Agent Skills plugin](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/runtime/plugins/agent_skills): Powerful bash command line tools for software development tasks introduced by [swe-agent](https://github.com/princeton-nlp/swe-agent).
-
-## Demo
-
-https://github.com/All-Hands-AI/OpenHands/assets/38853559/f592a192-e86c-4f48-ad31-d69282d5f6ac
-
-*Example of CodeActAgent with `gpt-4-turbo-2024-04-09` performing a data science task (linear regression)*
-
-## Work-in-progress & Next step
-
-[] Support web-browsing
-[] Complete the workflow for CodeAct agent to submit Github PRs
--- a/agenthub/codeact_agent/action_parser.py
+++ b/agenthub/codeact_agent/action_parser.py
@@ -40,6 +40,10 @@ class CodeActResponseParser(ResponseParser):
        if action is None:
            return ''
        for lang in ['bash', 'ipython', 'browse']:
+            # special handling for DeepSeek: it has stop-word bug and returns </execute_ipython instead of </execute_ipython>
+            if f'</execute_{lang}' in action and f'</execute_{lang}>' not in action:
+                action = action.replace(f'</execute_{lang}', f'</execute_{lang}>')
+
            if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
                action += f'</execute_{lang}>'
        return action
--- a/agenthub/codeact_agent/codeact_agent.py
+++ b/agenthub/codeact_agent/codeact_agent.py
@@ -5,8 +5,6 @@ from agenthub.codeact_agent.action_parser import CodeActResponseParser
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
 from openhands.core.config import AgentConfig
-from openhands.core.exceptions import OperationCancelled
-from openhands.core.logger import openhands_logger as logger
 from openhands.core.message import ImageContent, Message, TextContent
 from openhands.events.action import (
    Action,
@@ -206,22 +204,7 @@ class CodeActAgent(Agent):
            ],
        }

-        if self.llm.is_caching_prompt_active():
-            params['extra_headers'] = {
-                'anthropic-beta': 'prompt-caching-2024-07-31',
-            }
-
-        # TODO: move exception handling to agent_controller
-        try:
-            response = self.llm.completion(**params)
-        except OperationCancelled as e:
-            raise e
-        except Exception as e:
-            logger.error(f'{e}')
-            error_message = '{}: {}'.format(type(e).__name__, str(e).split('\n')[0])
-            return AgentFinishAction(
-                thought=f'Agent encountered an error while processing the last action.\nError: {error_message}\nPlease try again.'
-            )
+        response = self.llm.completion(**params)

        return self.action_parser.parse(response)

--- a/agenthub/codeact_swe_agent/codeact_swe_agent.py
+++ b/agenthub/codeact_swe_agent/codeact_swe_agent.py
@@ -166,7 +166,6 @@ class CodeActSWEAgent(Agent):
                '</execute_ipython>',
                '</execute_bash>',
            ],
-            temperature=0.0,
        )

        return self.response_parser.parse(response)
--- a/agenthub/micro/agent.py
+++ b/agenthub/micro/agent.py
@@ -78,7 +78,6 @@ class MicroAgent(Agent):
        message = Message(role='user', content=content)
        resp = self.llm.completion(
            messages=self.llm.format_messages_for_llm(message),
-            temperature=0.0,
        )
        action_resp = resp['choices'][0]['message']['content']
        action = parse_response(action_resp)
--- a/config.template.toml
+++ b/config.template.toml
@@ -112,7 +112,7 @@ api_key = "your-api-key"
 #embedding_deployment_name = ""

 # Embedding model to use
-embedding_model = ""
+embedding_model = "local"

 # Maximum number of characters in an observation's content
 #max_message_chars = 10000
@@ -146,8 +146,8 @@ model = "gpt-4o"
 # Drop any unmapped (unsupported) params without causing an exception
 #drop_params = false

-# Using the prompt caching feature provided by the LLM
-#caching_prompt = false
+# Using the prompt caching feature if provided by the LLM and supported
+#caching_prompt = true

 # Base URL for the OLLAMA API
 #ollama_base_url = ""
@@ -188,7 +188,7 @@ model = "gpt-4o-mini"
 #memory_max_threads = 2

 # LLM config group to use
-#llm_config = 'llm'
+#llm_config = 'your-llm-config-group'

 [agent.RepoExplorerAgent]
 # Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
@@ -232,7 +232,7 @@ llm_config = 'gpt3'
 [security]

 # Enable confirmation mode
-#confirmation_mode = true
+#confirmation_mode = false

 # The security analyzer to use
 #security_analyzer = ""
--- a/containers/app/Dockerfile
+++ b/containers/app/Dockerfile
@@ -37,7 +37,7 @@ ARG OPENHANDS_BUILD_VERSION #re-declare for this section
 ENV RUN_AS_OPENHANDS=true
 # A random number--we need this to be different from the user's UID on the host machine
 ENV OPENHANDS_USER_ID=42420
-ENV SANDBOX_API_HOSTNAME=host.docker.internal
+ENV SANDBOX_LOCAL_RUNTIME_URL=http://host.docker.internal
 ENV USE_HOST_NETWORK=false
 ENV WORKSPACE_BASE=/opt/workspace_base
 ENV OPENHANDS_BUILD_VERSION=$OPENHANDS_BUILD_VERSION
@@ -70,10 +70,11 @@ RUN playwright install --with-deps chromium
 COPY --chown=openhands:app --chmod=770 ./openhands ./openhands
 COPY --chown=openhands:app --chmod=777 ./openhands/runtime/plugins ./openhands/runtime/plugins
 COPY --chown=openhands:app --chmod=770 ./agenthub ./agenthub
-COPY --chown=openhands:app --chmod=770 ./pyproject.toml ./pyproject.toml
-COPY --chown=openhands:app --chmod=770 ./poetry.lock ./poetry.lock
-COPY --chown=openhands:app --chmod=770 ./README.md ./README.md
-COPY --chown=openhands:app --chmod=770 ./MANIFEST.in ./MANIFEST.in
+COPY --chown=openhands:app ./pyproject.toml ./pyproject.toml
+COPY --chown=openhands:app ./poetry.lock ./poetry.lock
+COPY --chown=openhands:app ./README.md ./README.md
+COPY --chown=openhands:app ./MANIFEST.in ./MANIFEST.in
+COPY --chown=openhands:app ./LICENSE ./LICENSE

 # This is run as "openhands" user, and will create __pycache__ with openhands:openhands ownership
 RUN python openhands/core/download.py # No-op to download assets
--- a/containers/build.sh
+++ b/containers/build.sh
@@ -1,13 +1,40 @@
 #!/bin/bash
 set -eo pipefail

-image_name=$1
-org_name=$2
+# Initialize variables with default values
+image_name=""
+org_name=""
 push=0
-if [[ $3 == "--push" ]]; then
-  push=1
+load=0
+tag_suffix=""
+
+# Function to display usage information
+usage() {
+    echo "Usage: $0 -i <image_name> [-o <org_name>] [--push] [--load] [-t <tag_suffix>]"
+    echo "  -i: Image name (required)"
+    echo "  -o: Organization name"
+    echo "  --push: Push the image"
+    echo "  --load: Load the image"
+    echo "  -t: Tag suffix"
+    exit 1
+}
+
+# Parse command-line options
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -i) image_name="$2"; shift 2 ;;
+        -o) org_name="$2"; shift 2 ;;
+        --push) push=1; shift ;;
+        --load) load=1; shift ;;
+        -t) tag_suffix="$2"; shift 2 ;;
+        *) usage ;;
+    esac
+done
+# Check if required arguments are provided
+if [[ -z "$image_name" ]]; then
+    echo "Error: Image name is required."
+    usage
 fi
-tag_suffix=$4

 echo "Building: $image_name"
 tags=()
@@ -95,14 +122,35 @@ if [[ $push -eq 1 ]]; then
  args+=" --cache-to=type=registry,ref=$DOCKER_REPOSITORY:$cache_tag,mode=max"
 fi

+if [[ $load -eq 1 ]]; then
+  args+=" --load"
+fi
+
 echo "Args: $args"

+# Modify the platform selection based on --load flag
+if [[ $load -eq 1 ]]; then
+  # When loading, build only for the current platform
+  platform=$(docker version -f '{{.Server.Os}}/{{.Server.Arch}}')
+else
+  # For push or without load, build for multiple platforms
+  platform="linux/amd64,linux/arm64"
+fi
+
+echo "Building for platform(s): $platform"
+
 docker buildx build \
  $args \
  --build-arg OPENHANDS_BUILD_VERSION="$OPENHANDS_BUILD_VERSION" \
  --cache-from=type=registry,ref=$DOCKER_REPOSITORY:$cache_tag \
  --cache-from=type=registry,ref=$DOCKER_REPOSITORY:$cache_tag_base-main \
-  --platform linux/amd64,linux/arm64 \
+  --platform $platform \
  --provenance=false \
  -f "$dir/Dockerfile" \
  "$DOCKER_BASE_DIR"
+
+# If load was requested, print the loaded images
+if [[ $load -eq 1 ]]; then
+  echo "Local images built:"
+  docker images "$DOCKER_REPOSITORY" --format "{{.Repository}}:{{.Tag}}"
+fi
--- a/containers/sandbox/Dockerfile
+++ b/containers/sandbox/Dockerfile
@@ -1,44 +0,0 @@
-FROM ubuntu:22.04
-
-# install basic packages
-RUN apt-get update && apt-get install -y \
-    curl \
-    wget \
-    git \
-    vim \
-    nano \
-    unzip \
-    zip \
-    python3 \
-    python3-pip \
-    python3-venv \
-    python3-dev \
-    build-essential \
-    openssh-server \
-    sudo \
-    gcc \
-    jq \
-    g++ \
-    make \
-    iproute2 \
-    && rm -rf /var/lib/apt/lists/*
-
-RUN mkdir -p -m0755 /var/run/sshd
-
-# symlink python3 to python
-RUN ln -s /usr/bin/python3 /usr/bin/python
-
-# ==== OpenHands Runtime Client ====
-RUN mkdir -p /openhands && mkdir -p /openhands/logs && chmod 777 /openhands/logs
-RUN wget --progress=bar:force -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
-RUN bash Miniforge3.sh -b -p /openhands/miniforge3
-RUN chmod -R g+w /openhands/miniforge3
-RUN bash -c ". /openhands/miniforge3/etc/profile.d/conda.sh && conda config --set changeps1 False && conda config --append channels conda-forge"
-RUN echo "" > /openhands/bash.bashrc
-RUN rm -f Miniforge3.sh
-
-# - agentskills dependencies
-RUN /openhands/miniforge3/bin/pip install --upgrade pip
-RUN /openhands/miniforge3/bin/pip install jupyterlab notebook jupyter_kernel_gateway flake8
-RUN /openhands/miniforge3/bin/pip install python-docx PyPDF2 python-pptx pylatexenc openai
-RUN /openhands/miniforge3/bin/pip install python-dotenv toml termcolor pydantic python-docx pyyaml docker pexpect tenacity e2b browsergym minio
--- a/containers/sandbox/config.sh
+++ b/containers/sandbox/config.sh
@@ -1,4 +0,0 @@
-DOCKER_REGISTRY=ghcr.io
-DOCKER_ORG=all-hands-ai
-DOCKER_IMAGE=sandbox
-DOCKER_BASE_DIR="."
--- a/docs/modules/usage/getting-started.mdx
+++ b/docs/modules/usage/getting-started.mdx
@@ -18,6 +18,8 @@ existing code that you'd like to modify.
 ```bash
 export WORKSPACE_BASE=$(pwd)/workspace

+docker pull ghcr.io/all-hands-ai/runtime:0.9-nikolaik
+
 docker run -it --pull=always \
    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.9-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
@@ -54,7 +56,7 @@ The `Advanced Options` also allow you to specify a `Base URL` if required.

 ## Versions

-The command above pulls the `0.9` tag, which represents the most recent stable release of OpenHands. You have other options as well:
+The command above pulls the most recent stable release of OpenHands. You have other options as well:
 - For a specific release, use `ghcr.io/all-hands-ai/openhands:$VERSION`, replacing $VERSION with the version number.
 - We use semver, and release major, minor, and patch tags. So `0.9` will automatically point to the latest `0.9.x` release, and `0` will point to the latest `0.x.x` release.
 - For the most up-to-date development version, you can use `ghcr.io/all-hands-ai/openhands:main`. This version is unstable and is recommended for testing or development purposes only.
--- a/docs/modules/usage/how-to/debugging.md
+++ b/docs/modules/usage/how-to/debugging.md
@@ -0,0 +1,71 @@
+# Debugging
+
+The following is intended as a primer on debugging OpenHands for Development purposes.
+
+## Server / VSCode
+
+The following `launch.json` will allow debugging the agent, controller and server elements, but not the sandbox (Which runs inside docker). It will ignore any changes inside the `workspace/` directory:
+
+```
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "OpenHands CLI",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "openhands.core.cli",
+            "justMyCode": false
+        },
+        {
+            "name": "OpenHands WebApp",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "uvicorn",
+            "args": [
+                "openhands.server.listen:app",
+                "--reload",
+                "--reload-exclude",
+                "${workspaceFolder}/workspace",
+                "--port",
+                "3000"
+            ],
+            "justMyCode": false
+        }
+    ]
+}
+```
+
+More specific debugging configurations which include more parameters may be specified:
+
+```
+    ...
+    {
+      "name": "Debug CodeAct",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "openhands.core.main",
+      "args": [
+        "-t",
+        "Ask me what your task is.",
+        "-d",
+        "${workspaceFolder}/workspace",
+        "-c",
+        "CodeActAgent",
+        "-l",
+        "llm.o1",
+        "-n",
+        "prompts"
+      ],
+      "justMyCode": false
+    }
+    ...
+```
+
+Values in the snippet above can be updated such that:
+
+    * *t*: the task
+    * *d*: the openhands workspace directory
+    * *c*: the agent
+    * *l*: the LLM config (pre-defined in config.toml)
+    * *n*: session name (e.g. eventstream name)
--- a/docs/modules/usage/how-to/openshift-example.md
+++ b/docs/modules/usage/how-to/openshift-example.md
@@ -177,6 +177,7 @@ spec:
      claimName: docker-pvc
 ```

+
 ```bash
 # create the pod
 $ oc create -f pod.yaml
@@ -262,3 +263,167 @@ Events:                   <none>
 6. Connect to OpenHands UI, configure the Agent, then test:

 ![image](https://github.com/user-attachments/assets/12f94804-a0c7-4744-b873-e003c9caf40e)
+
+
+
+## GCP GKE Openhands deployment
+
+**Warning**: this deployment grants the OpenHands application access to the Kubernetes docker socket, which creates security risk. Use at your own discretion.
+1- Create policy for privillege access
+2- Create gke credentials(optional)
+3- Create openhands deployment
+4- Verification and ui access commands
+5- Tshoot pod to verify the internal container
+
+1. create policy for privillege access
+```bash
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: privileged-role
+rules:
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["create", "get", "list", "watch", "delete"]
+- apiGroups: ["apps"]
+  resources: ["deployments"]
+  verbs: ["create", "get", "list", "watch", "delete"]
+- apiGroups: [""]
+  resources: ["pods/exec"]
+  verbs: ["create"]
+- apiGroups: [""]
+  resources: ["pods/log"]
+  verbs: ["get"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: privileged-role-binding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: privileged-role
+subjects:
+- kind: ServiceAccount
+  name: default  # Change to your service account name
+  namespace: default
+```
+2. create gke credentials(optional)
+```bash
+kubectl create secret generic google-cloud-key \
+  --from-file=key.json=/path/to/your/google-cloud-key.json
+  ```
+3. create openhands deployment
+## as this is tested for the single worker node if you have multiple specify the flag for the single worker
+
+```bash
+kind: Deployment
+metadata:
+  name: openhands-app-2024
+  labels:
+    app: openhands-app-2024
+spec:
+  replicas: 1  # You can increase this number for multiple replicas
+  selector:
+    matchLabels:
+      app: openhands-app-2024
+  template:
+    metadata:
+      labels:
+        app: openhands-app-2024
+    spec:
+      containers:
+      - name: openhands-app-2024
+        image: ghcr.io/all-hands-ai/openhands:main
+        env:
+        - name: SANDBOX_USER_ID
+          value: "1000"
+        - name: SANDBOX_API_HOSTNAME
+          value: '10.164.0.4'
+        - name: WORKSPACE_MOUNT_PATH
+          value: "/tmp/workspace_base"
+        - name: GOOGLE_APPLICATION_CREDENTIALS
+          value: "/tmp/workspace_base/google-cloud-key.json"
+        volumeMounts:
+        - name: workspace-volume
+          mountPath: /tmp/workspace_base
+        - name: docker-sock
+          mountPath: /var/run/docker.sock
+        - name: google-credentials
+          mountPath: "/tmp/workspace_base/google-cloud-key.json"
+        securityContext:
+          privileged: true  # Add this to allow privileged access
+        ports:
+        - containerPort: 3000
+      - name: openhands-sandbox-2024
+        image: ghcr.io/opendevin/sandbox:main
+    #    securityContext:
+    #      privileged: true  # Add this to allow privileged access
+        ports:
+        - containerPort: 51963
+        command: ["/usr/sbin/sshd", "-D", "-p 51963", "-o", "PermitRootLogin=yes"]
+      volumes:
+      #- name: workspace-volume
+      #  persistentVolumeClaim:
+      #    claimName: workspace-pvc
+      - name: workspace-volume
+        emptyDir: {}
+      - name: docker-sock
+        hostPath:
+          path: /var/run/docker.sock       # Use host's Docker socket
+          type: Socket
+      - name: google-credentials
+        secret:
+          secretName: google-cloud-key
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: openhands-app-2024-svc
+spec:
+  selector:
+    app: openhands-app-2024
+  ports:
+  - name: http
+    protocol: TCP
+    port: 80
+    targetPort: 3000
+  - name: ssh
+    protocol: TCP
+    port: 51963
+    targetPort: 51963
+  type: LoadBalancer
+  ```
+
+5. Tshoot pod to verify the internal container
+### if you want to know more regarding the internal container runtime use below mention pod deployment use kubectl exec -it to enter into container and you can check the contaienr run time using normal docker commands like "docker ps -a"
+
+```bash
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: docker-in-docker
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: docker-in-docker
+  template:
+    metadata:
+      labels:
+        app: docker-in-docker
+    spec:
+      containers:
+      - name: dind
+        image: docker:20.10-dind
+        securityContext:
+          privileged: true
+        volumeMounts:
+        - name: docker-sock
+          mountPath: /var/run/docker.sock
+      volumes:
+      - name: docker-sock
+        hostPath:
+          path: /var/run/docker.sock
+          type: Socket
+```
--- a/docs/modules/usage/llms/llms.md
+++ b/docs/modules/usage/llms/llms.md
@@ -59,9 +59,21 @@ We have a few guides for running OpenHands with specific model providers:

 ### API retries and rate limits

-Some LLMs have rate limits and may require retries. OpenHands will automatically retry requests if it receives a 429 error or API connection error.
-You can set the following environment variables to control the number of retries and the time between retries:
+LLM providers typically have rate limits, sometimes very low, and may require retries. OpenHands will automatically retry requests if it receives a Rate Limit Error (429 error code), API connection error, or other transient errors.
+
+You can customize these options as you need for the provider you're using. Check their documentation, and set the following environment variables to control the number of retries and the time between retries:

 * `LLM_NUM_RETRIES` (Default of 8)
 * `LLM_RETRY_MIN_WAIT` (Default of 15 seconds)
 * `LLM_RETRY_MAX_WAIT` (Default of 120 seconds)
+* `LLM_RETRY_MULTIPLIER` (Default of 2)
+
+If you running `openhands` in development mode, you can also set these options to the values you need in `config.toml` file:
+
+```toml
+[llm]
+num_retries = 8
+retry_min_wait = 15
+retry_max_wait = 120
+retry_multiplier = 2
+```
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@@ -8,6 +8,11 @@ const sidebars: SidebarsConfig = {
      label: 'Getting Started',
      id: 'usage/getting-started',
    },
+    {
+      type: 'doc',
+      label: 'Configuration',
+      id: 'src/configuration',
+    },
    {
      type: 'category',
      label: 'LLMs',
@@ -83,6 +88,10 @@ const sidebars: SidebarsConfig = {
        {
          type: 'doc',
          id: 'usage/how-to/openshift-example',
+        },
+        {
+          type: 'doc',
+          id: 'usage/how-to/debugging',
        }
      ]
    },
--- a/docs/src/configuration.md
+++ b/docs/src/configuration.md
@@ -0,0 +1,41 @@
+# OpenDevin Configuration Options
+
+OpenDevin provides various configuration options to customize its behavior. This page documents all available options.
+
+## General Configuration
+
+- `project_name`: The name of your project.
+- `output_dir`: The directory where output files will be saved.
+- `max_iterations`: The maximum number of iterations for the AI to attempt solving a task.
+- `max_time`: The maximum time (in seconds) for the AI to work on a task.
+
+## AI Model Configuration
+
+- `model`: The AI model to use (e.g., "gpt-4", "gpt-3.5-turbo").
+- `temperature`: Controls the randomness of the AI's output (0.0 to 1.0).
+- `max_tokens`: The maximum number of tokens to generate in the AI's response.
+
+## Execution Environment
+
+- `python_path`: The path to the Python interpreter to use.
+- `allowed_modules`: A list of Python modules that are allowed to be imported.
+- `timeout`: The maximum execution time for a single command (in seconds).
+
+## Logging and Debugging
+
+- `log_level`: The level of logging detail (e.g., "DEBUG", "INFO", "WARNING", "ERROR").
+- `log_file`: The file path for saving logs.
+- `debug_mode`: Enable or disable debug mode (true/false).
+
+## Security
+
+- `allow_internet_access`: Allow the AI to access the internet (true/false).
+- `allowed_domains`: A list of allowed domains if internet access is enabled.
+- `max_file_size`: The maximum size (in bytes) of files that can be created or modified.
+
+## Custom Behavior
+
+- `custom_prompts`: A dictionary of custom prompts to use for specific tasks.
+- `task_specific_settings`: A dictionary of settings that apply to specific tasks or modules.
+
+Please refer to the OpenDevin documentation for more detailed information on how to use these configuration options in your project.
--- a/evaluation/swe_bench/README.md
+++ b/evaluation/swe_bench/README.md
@@ -69,7 +69,7 @@ This is in limited beta. Contact Xingyao over slack if you want to try this out!

 ```bash
 # ./evaluation/swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers] [dataset] [dataset_split]
-ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote EVAL_DOCKER_IMAGE_PREFIX="us-docker.pkg.dev/evaluation-428620/swe-bench-images" \
+ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.eval.all-hands.dev" EVAL_DOCKER_IMAGE_PREFIX="us-central1-docker.pkg.dev/evaluation-092424/swe-bench-images" \
 ./evaluation/swe_bench/scripts/run_infer.sh llm.eval HEAD CodeActAgent 300 30 16 "princeton-nlp/SWE-bench_Lite" test
 # This example runs evaluation on CodeActAgent for 300 instances on "princeton-nlp/SWE-bench_Lite"'s test set, with max 30 iteration per instances, with 16 number of workers running in parallel
 ```
@@ -163,7 +163,8 @@ This is in limited beta. Contact Xingyao over slack if you want to try this out!

 ```bash
 # ./evaluation/swe_bench/scripts/eval_infer_remote.sh [output.jsonl filepath] [num_workers]
-ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote EVAL_DOCKER_IMAGE_PREFIX="us-docker.pkg.dev/evaluation-428620/swe-bench-images" evaluation/swe_bench/scripts/eval_infer_remote.sh evaluation/outputs/swe_bench_lite/CodeActAgent/Llama-3.1-70B-Instruct-Turbo_maxiter_30_N_v1.9-no-hint/output.jsonl 16 "princeton-nlp/SWE-bench_Lite" "test"
+ALLHANDS_API_KEY="YOUR-API-KEY" RUNTIME=remote SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.eval.all-hands.dev" EVAL_DOCKER_IMAGE_PREFIX="us-central1-docker.pkg.dev/evaluation-092424/swe-bench-images" \
+evaluation/swe_bench/scripts/eval_infer_remote.sh evaluation/evaluation_outputs/outputs/swe_bench_lite/CodeActAgent/Llama-3.1-70B-Instruct-Turbo_maxiter_30_N_v1.9-no-hint/output.jsonl 16 "princeton-nlp/SWE-bench_Lite" "test"
 # This example evaluate patches generated by CodeActAgent on Llama-3.1-70B-Instruct-Turbo on "princeton-nlp/SWE-bench_Lite"'s test set, with 16 number of workers running in parallel
 ```

--- a/evaluation/swe_bench/eval_infer.py
+++ b/evaluation/swe_bench/eval_infer.py
@@ -81,6 +81,7 @@ def get_config(instance: pd.Series) -> AppConfig:
            # large enough timeout, since some testcases take very long to run
            timeout=1800,
            api_key=os.environ.get('ALLHANDS_API_KEY', None),
+            remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
        ),
        # do not mount workspace
        workspace_base=None,
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@@ -131,6 +131,7 @@ def get_config(
            # large enough timeout, since some testcases take very long to run
            timeout=300,
            api_key=os.environ.get('ALLHANDS_API_KEY', None),
+            remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
        ),
        # do not mount workspace
        workspace_base=None,
--- a/evaluation/swe_bench/scripts/cleanup_remote_runtime.sh
+++ b/evaluation/swe_bench/scripts/cleanup_remote_runtime.sh
@@ -2,10 +2,10 @@


 # API base URL
-BASE_URL="https://api.all-hands.dev/v0"
+BASE_URL="https://runtime.eval.all-hands.dev"

 # Get the list of runtimes
-response=$(curl --silent --location --request GET "${BASE_URL}/runtime/list" \
+response=$(curl --silent --location --request GET "${BASE_URL}/list" \
  --header "X-API-Key: ${ALLHANDS_API_KEY}")

 n_runtimes=$(echo $response | jq -r '.total')
@@ -16,7 +16,7 @@ runtime_ids=$(echo $response | jq -r '.runtimes | .[].runtime_id')
 counter=1
 for runtime_id in $runtime_ids; do
  echo "Stopping runtime ${counter}/${n_runtimes}: ${runtime_id}"
-  curl --silent --location --request POST "${BASE_URL}/runtime/stop" \
+  curl --silent --location --request POST "${BASE_URL}/stop" \
    --header "X-API-Key: ${ALLHANDS_API_KEY}" \
    --header "Content-Type: application/json" \
    --data-raw "{\"runtime_id\": \"${runtime_id}\"}"
--- a/evaluation/swe_bench/scripts/eval_infer.sh
+++ b/evaluation/swe_bench/scripts/eval_infer.sh
@@ -106,7 +106,7 @@ if [ -z "$INSTANCE_ID" ]; then
        rm -rf $RESULT_OUTPUT_DIR/eval_outputs
    fi

-    mv run_instance_logs/$RUN_ID/$MODEL_NAME_OR_PATH $RESULT_OUTPUT_DIR
+    mv logs/run_evaluation/$RUN_ID/$MODEL_NAME_OR_PATH $RESULT_OUTPUT_DIR
    mv $RESULT_OUTPUT_DIR/$MODEL_NAME_OR_PATH $RESULT_OUTPUT_DIR/eval_outputs
    echo "RUN_ID: $RUN_ID" > $RESULT_OUTPUT_DIR/run_id.txt

--- a/evaluation/utils/shared.py
+++ b/evaluation/utils/shared.py
@@ -42,6 +42,7 @@ class EvalMetadata(BaseModel):
        dumped_dict = json.loads(dumped)
        # avoid leaking sensitive information
        dumped_dict['llm_config'] = self.llm_config.to_safe_dict()
+        logger.debug(f'Dumped metadata: {dumped_dict}')
        return json.dumps(dumped_dict)


@@ -374,18 +375,27 @@ def reset_logger_for_multiprocessing(
    # Remove all existing handlers from logger
    for handler in logger.handlers[:]:
        logger.removeHandler(handler)
-    # add back the console handler to print ONE line
-    logger.addHandler(get_console_handler())
+
+    # add console handler to print ONE line
+    console_handler = get_console_handler(log_level=logging.INFO)
+    console_handler.setFormatter(
+        logging.Formatter(
+            f'Instance {instance_id} - ' + '%(asctime)s - %(levelname)s - %(message)s'
+        )
+    )
+    logger.addHandler(console_handler)
    logger.info(
        f'Starting evaluation for instance {instance_id}.\n'
        f'Hint: run "tail -f {log_file}" to see live logs in a separate shell'
    )
-    # Remove all existing handlers from logger
-    for handler in logger.handlers[:]:
-        logger.removeHandler(handler)
+    # Only log WARNING or higher to console
+    console_handler.setLevel(logging.WARNING)
+
+    # Log INFO and above to file
    os.makedirs(os.path.dirname(log_file), exist_ok=True)
    file_handler = logging.FileHandler(log_file)
    file_handler.setFormatter(
        logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    )
+    file_handler.setLevel(logging.INFO)
    logger.addHandler(file_handler)
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "openhands-frontend",
-  "version": "0.9.4",
+  "version": "0.9.7",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "openhands-frontend",
-      "version": "0.9.4",
+      "version": "0.9.7",
      "dependencies": {
        "@monaco-editor/react": "^4.6.0",
        "@nextui-org/react": "^2.4.8",
@@ -33,7 +33,7 @@
        "react-syntax-highlighter": "^15.5.0",
        "remark-gfm": "^4.0.0",
        "tailwind-merge": "^2.5.2",
-        "vite": "^5.4.7",
+        "vite": "^5.4.8",
        "web-vitals": "^3.5.2"
      },
      "devDependencies": {
@@ -41,8 +41,8 @@
        "@testing-library/jest-dom": "^6.5.0",
        "@testing-library/react": "^16.0.1",
        "@testing-library/user-event": "^14.5.2",
-        "@types/node": "^22.6.1",
-        "@types/react": "^18.3.8",
+        "@types/node": "^22.7.3",
+        "@types/react": "^18.3.9",
        "@types/react-dom": "^18.3.0",
        "@types/react-highlight": "^0.12.8",
        "@types/react-syntax-highlighter": "^15.5.13",
@@ -60,11 +60,11 @@
        "eslint-plugin-react": "^7.35.0",
        "eslint-plugin-react-hooks": "^4.6.2",
        "husky": "^9.1.6",
-        "jsdom": "^25.0.0",
+        "jsdom": "^25.0.1",
        "lint-staged": "^15.2.10",
        "postcss": "^8.4.47",
        "prettier": "^3.3.3",
-        "tailwindcss": "^3.4.12",
+        "tailwindcss": "^3.4.13",
        "typescript": "^5.6.2",
        "vite-tsconfig-paths": "^5.0.1",
        "vitest": "^1.6.0"
@@ -4860,9 +4860,9 @@
      "integrity": "sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g=="
    },
    "node_modules/@types/node": {
-      "version": "22.6.1",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-22.6.1.tgz",
-      "integrity": "sha512-V48tCfcKb/e6cVUigLAaJDAILdMP0fUW6BidkPK4GpGjXcfbnoHasCZDwz3N3yVt5we2RHm4XTQCpv0KJz9zqw==",
+      "version": "22.7.3",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-22.7.3.tgz",
+      "integrity": "sha512-qXKfhXXqGTyBskvWEzJZPUxSslAiLaB6JGP1ic/XTH9ctGgzdgYguuLP1C601aRTSDNlLb0jbKqXjZ48GNraSA==",
      "devOptional": true,
      "dependencies": {
        "undici-types": "~6.19.2"
@@ -4874,9 +4874,9 @@
      "integrity": "sha512-5zvhXYtRNRluoE/jAp4GVsSduVUzNWKkOZrCDBWYtE7biZywwdC2AcEzg+cSMLFRfVgeAFqpfNabiPjxFddV1Q=="
    },
    "node_modules/@types/react": {
-      "version": "18.3.8",
-      "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.8.tgz",
-      "integrity": "sha512-syBUrW3/XpnW4WJ41Pft+I+aPoDVbrBVQGEnbD7NijDGlVC+8gV/XKRY+7vMDlfPpbwYt0l1vd/Sj8bJGMbs9Q==",
+      "version": "18.3.9",
+      "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.9.tgz",
+      "integrity": "sha512-+BpAVyTpJkNWWSSnaLBk6ePpHLOGJKnEQNbINNovPWzvEUyAe3e+/d494QdEh71RekM/qV7lw6jzf1HGrJyAtQ==",
      "dependencies": {
        "@types/prop-types": "*",
        "csstype": "^3.0.2"
@@ -6160,23 +6160,17 @@
      }
    },
    "node_modules/cssstyle": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-4.0.1.tgz",
-      "integrity": "sha512-8ZYiJ3A/3OkDd093CBT/0UKDWry7ak4BdPTFP2+QEP7cmhouyq/Up709ASSj2cK02BbZiMgk7kYjZNS4QP5qrQ==",
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-4.1.0.tgz",
+      "integrity": "sha512-h66W1URKpBS5YMI/V8PyXvTMFT8SupJ1IzoIV8IeBC/ji8WVmrO8dGlTi+2dh6whmdk6BiKJLD/ZBkhWbcg6nA==",
      "dev": true,
      "dependencies": {
-        "rrweb-cssom": "^0.6.0"
+        "rrweb-cssom": "^0.7.1"
      },
      "engines": {
        "node": ">=18"
      }
    },
-    "node_modules/cssstyle/node_modules/rrweb-cssom": {
-      "version": "0.6.0",
-      "resolved": "https://registry.npmjs.org/rrweb-cssom/-/rrweb-cssom-0.6.0.tgz",
-      "integrity": "sha512-APM0Gt1KoXBz0iIkkdB/kfvGOwC4UuJFeG/c+yV7wSc7q96cG/kJ0HiYCnzivD9SB53cLV1MlHFNfOuPaadYSw==",
-      "dev": true
-    },
    "node_modules/csstype": {
      "version": "3.1.3",
      "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz",
@@ -8872,12 +8866,12 @@
      }
    },
    "node_modules/jsdom": {
-      "version": "25.0.0",
-      "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-25.0.0.tgz",
-      "integrity": "sha512-OhoFVT59T7aEq75TVw9xxEfkXgacpqAhQaYgP9y/fDqWQCMB/b1H66RfmPm/MaeaAIU9nDwMOVTlPN51+ao6CQ==",
+      "version": "25.0.1",
+      "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-25.0.1.tgz",
+      "integrity": "sha512-8i7LzZj7BF8uplX+ZyOlIz86V6TAsSs+np6m1kpW9u0JWi4z/1t+FzcK1aek+ybTnAC4KhBL4uXCNT0wcUIeCw==",
      "dev": true,
      "dependencies": {
-        "cssstyle": "^4.0.1",
+        "cssstyle": "^4.1.0",
        "data-urls": "^5.0.0",
        "decimal.js": "^10.4.3",
        "form-data": "^4.0.0",
@@ -8890,7 +8884,7 @@
        "rrweb-cssom": "^0.7.1",
        "saxes": "^6.0.0",
        "symbol-tree": "^3.2.4",
-        "tough-cookie": "^4.1.4",
+        "tough-cookie": "^5.0.0",
        "w3c-xmlserializer": "^5.0.0",
        "webidl-conversions": "^7.0.0",
        "whatwg-encoding": "^3.1.1",
@@ -11061,12 +11055,6 @@
        "url": "https://github.com/sponsors/wooorm"
      }
    },
-    "node_modules/psl": {
-      "version": "1.9.0",
-      "resolved": "https://registry.npmjs.org/psl/-/psl-1.9.0.tgz",
-      "integrity": "sha512-E/ZsdU4HLs/68gYzgGTkMicWTLPdAftJLfJFlLUAAKZGkStNU72sZjT66SnMDVOfOWY/YAoiD7Jxa9iHvngcag==",
-      "dev": true
-    },
    "node_modules/punycode": {
      "version": "2.3.1",
      "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
@@ -11076,12 +11064,6 @@
        "node": ">=6"
      }
    },
-    "node_modules/querystringify": {
-      "version": "2.2.0",
-      "resolved": "https://registry.npmjs.org/querystringify/-/querystringify-2.2.0.tgz",
-      "integrity": "sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==",
-      "dev": true
-    },
    "node_modules/queue-microtask": {
      "version": "1.2.3",
      "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
@@ -11593,12 +11575,6 @@
        "url": "https://opencollective.com/unified"
      }
    },
-    "node_modules/requires-port": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz",
-      "integrity": "sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ==",
-      "dev": true
-    },
    "node_modules/reselect": {
      "version": "5.1.1",
      "resolved": "https://registry.npmjs.org/reselect/-/reselect-5.1.1.tgz",
@@ -12426,9 +12402,9 @@
      }
    },
    "node_modules/tailwindcss": {
-      "version": "3.4.12",
-      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.12.tgz",
-      "integrity": "sha512-Htf/gHj2+soPb9UayUNci/Ja3d8pTmu9ONTfh4QY8r3MATTZOzmv6UYWF7ZwikEIC8okpfqmGqrmDehua8mF8w==",
+      "version": "3.4.13",
+      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.13.tgz",
+      "integrity": "sha512-KqjHOJKogOUt5Bs752ykCeiwvi0fKVkr5oqsFNt/8px/tA8scFPIlkygsf6jXrfCqGHz7VflA6+yytWuM+XhFw==",
      "dependencies": {
        "@alloc/quick-lru": "^5.2.0",
        "arg": "^5.0.2",
@@ -12566,6 +12542,24 @@
        "node": ">=14.0.0"
      }
    },
+    "node_modules/tldts": {
+      "version": "6.1.47",
+      "resolved": "https://registry.npmjs.org/tldts/-/tldts-6.1.47.tgz",
+      "integrity": "sha512-R/K2tZ5MiY+mVrnSkNJkwqYT2vUv1lcT6wJvd2emGaMJ7PHUGRY4e3tUsdFCXgqxi2QgbHjL3yJgXCo40v9Hxw==",
+      "dev": true,
+      "dependencies": {
+        "tldts-core": "^6.1.47"
+      },
+      "bin": {
+        "tldts": "bin/cli.js"
+      }
+    },
+    "node_modules/tldts-core": {
+      "version": "6.1.47",
+      "resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-6.1.47.tgz",
+      "integrity": "sha512-6SWyFMnlst1fEt7GQVAAu16EGgFK0cLouH/2Mk6Ftlwhv3Ol40L0dlpGMcnnNiiOMyD2EV/aF3S+U2nKvvLvrA==",
+      "dev": true
+    },
    "node_modules/to-fast-properties": {
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/to-fast-properties/-/to-fast-properties-2.0.0.tgz",
@@ -12586,18 +12580,15 @@
      }
    },
    "node_modules/tough-cookie": {
-      "version": "4.1.4",
-      "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.1.4.tgz",
-      "integrity": "sha512-Loo5UUvLD9ScZ6jh8beX1T6sO1w2/MpCRpEP7V280GKMVUQ0Jzar2U3UJPsrdbziLEMMhu3Ujnq//rhiFuIeag==",
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-5.0.0.tgz",
+      "integrity": "sha512-FRKsF7cz96xIIeMZ82ehjC3xW2E+O2+v11udrDYewUbszngYhsGa8z6YUMMzO9QJZzzyd0nGGXnML/TReX6W8Q==",
      "dev": true,
      "dependencies": {
-        "psl": "^1.1.33",
-        "punycode": "^2.1.1",
-        "universalify": "^0.2.0",
-        "url-parse": "^1.5.3"
+        "tldts": "^6.1.32"
      },
      "engines": {
-        "node": ">=6"
+        "node": ">=16"
      }
    },
    "node_modules/tr46": {
@@ -12936,15 +12927,6 @@
        "url": "https://opencollective.com/unified"
      }
    },
-    "node_modules/universalify": {
-      "version": "0.2.0",
-      "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.2.0.tgz",
-      "integrity": "sha512-CJ1QgKmNg3CwvAv/kOFmtnEN05f0D/cn9QntgNOQlQF9dgvVTHj3t+8JPdjqawCHk7V/KA+fbUqzZ9XWhcqPUg==",
-      "dev": true,
-      "engines": {
-        "node": ">= 4.0.0"
-      }
-    },
    "node_modules/update-browserslist-db": {
      "version": "1.1.0",
      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.0.tgz",
@@ -12983,16 +12965,6 @@
        "punycode": "^2.1.0"
      }
    },
-    "node_modules/url-parse": {
-      "version": "1.5.10",
-      "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.10.tgz",
-      "integrity": "sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==",
-      "dev": true,
-      "dependencies": {
-        "querystringify": "^2.1.1",
-        "requires-port": "^1.0.0"
-      }
-    },
    "node_modules/use-callback-ref": {
      "version": "1.3.2",
      "resolved": "https://registry.npmjs.org/use-callback-ref/-/use-callback-ref-1.3.2.tgz",
@@ -13112,9 +13084,9 @@
      }
    },
    "node_modules/vite": {
-      "version": "5.4.7",
-      "resolved": "https://registry.npmjs.org/vite/-/vite-5.4.7.tgz",
-      "integrity": "sha512-5l2zxqMEPVENgvzTuBpHer2awaetimj2BGkhBPdnwKbPNOlHsODU+oiazEZzLK7KhAnOrO+XGYJYn4ZlUhDtDQ==",
+      "version": "5.4.8",
+      "resolved": "https://registry.npmjs.org/vite/-/vite-5.4.8.tgz",
+      "integrity": "sha512-FqrItQ4DT1NC4zCUqMB4c4AZORMKIa0m8/URVCZ77OZ/QSNeJ54bU1vrFADbDsuwfIPcgknRkmqakQcgnL4GiQ==",
      "dependencies": {
        "esbuild": "^0.21.3",
        "postcss": "^8.4.43",
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -1,6 +1,6 @@
 {
  "name": "openhands-frontend",
-  "version": "0.9.4",
+  "version": "0.9.7",
  "private": true,
  "type": "module",
  "engines": {
@@ -32,7 +32,7 @@
    "react-syntax-highlighter": "^15.5.0",
    "remark-gfm": "^4.0.0",
    "tailwind-merge": "^2.5.2",
-    "vite": "^5.4.7",
+    "vite": "^5.4.8",
    "web-vitals": "^3.5.2"
  },
  "scripts": {
@@ -64,8 +64,8 @@
    "@testing-library/jest-dom": "^6.5.0",
    "@testing-library/react": "^16.0.1",
    "@testing-library/user-event": "^14.5.2",
-    "@types/node": "^22.6.1",
-    "@types/react": "^18.3.8",
+    "@types/node": "^22.7.3",
+    "@types/react": "^18.3.9",
    "@types/react-dom": "^18.3.0",
    "@types/react-highlight": "^0.12.8",
    "@types/react-syntax-highlighter": "^15.5.13",
@@ -83,11 +83,11 @@
    "eslint-plugin-react": "^7.35.0",
    "eslint-plugin-react-hooks": "^4.6.2",
    "husky": "^9.1.6",
-    "jsdom": "^25.0.0",
+    "jsdom": "^25.0.1",
    "lint-staged": "^15.2.10",
    "postcss": "^8.4.47",
    "prettier": "^3.3.3",
-    "tailwindcss": "^3.4.12",
+    "tailwindcss": "^3.4.13",
    "typescript": "^5.6.2",
    "vite-tsconfig-paths": "^5.0.1",
    "vitest": "^1.6.0"
--- a/frontend/src/components/AgentStatusBar.tsx
+++ b/frontend/src/components/AgentStatusBar.tsx
@@ -94,13 +94,13 @@ function AgentStatusBar() {
  const [statusMessage, setStatusMessage] = React.useState<string>("");

  React.useEffect(() => {
-    const trimmedCustomMessage = curStatusMessage.message.trim();
+    const trimmedCustomMessage = curStatusMessage.status.trim();
    if (trimmedCustomMessage) {
      setStatusMessage(t(trimmedCustomMessage));
    } else {
      setStatusMessage(AgentStatusMap[curAgentState].message);
    }
-  }, [curAgentState, curStatusMessage.message]);
+  }, [curAgentState, curStatusMessage.status]);

  return (
    <div className="flex flex-col items-center">
--- a/frontend/src/components/modals/settings/SettingsForm.tsx
+++ b/frontend/src/components/modals/settings/SettingsForm.tsx
@@ -7,6 +7,7 @@ import { I18nKey } from "../../../i18n/declaration";
 import { AutocompleteCombobox } from "./AutocompleteCombobox";
 import { Settings } from "#/services/settings";
 import { organizeModelsAndProviders } from "#/utils/organizeModelsAndProviders";
+import { extractModelAndProvider } from "#/utils/extractModelAndProvider";
 import { ModelSelector } from "./ModelSelector";

 interface SettingsFormProps {
@@ -41,17 +42,29 @@ function SettingsForm({
 }: SettingsFormProps) {
  const { t } = useTranslation();
  const { isOpen: isVisible, onOpenChange: onVisibleChange } = useDisclosure();
-  const advancedAlreadyInUse = React.useMemo(
-    () =>
+  const advancedAlreadyInUse = React.useMemo(() => {
+    const organizedModels = organizeModelsAndProviders(models);
+    const { provider, model } = extractModelAndProvider(
+      settings.LLM_MODEL || "",
+    );
+    const isKnownModel =
+      provider in organizedModels &&
+      organizedModels[provider].models.includes(model);
+
+    return (
      !!settings.SECURITY_ANALYZER ||
      !!settings.CONFIRMATION_MODE ||
      !!settings.LLM_BASE_URL ||
-      (!!settings.LLM_MODEL && !models.includes(settings.LLM_MODEL)),
-    [],
-  );
+      (!!settings.LLM_MODEL && !isKnownModel)
+    );
+  }, [settings, models]);
  const [enableAdvanced, setEnableAdvanced] =
    React.useState(advancedAlreadyInUse);

+  React.useEffect(() => {
+    setEnableAdvanced(advancedAlreadyInUse);
+  }, [advancedAlreadyInUse]);
+
  const handleAdvancedChange = (value: boolean) => {
    setEnableAdvanced(value);
  };
--- a/frontend/src/services/actions.ts
+++ b/frontend/src/services/actions.ts
@@ -140,11 +140,11 @@ export function handleActionMessage(message: ActionMessage) {
 }

 export function handleStatusMessage(message: StatusMessage) {
-  const msg = message.message == null ? "" : message.message.trim();
+  const msg = message.status == null ? "" : message.status.trim();
  store.dispatch(
    setCurStatusMessage({
      ...message,
-      message: msg,
+      status: msg,
    }),
  );
 }
@@ -160,9 +160,9 @@ export function handleAssistantMessage(data: string | SocketMessage) {

  if ("action" in socketMessage) {
    handleActionMessage(socketMessage);
-  } else if ("observation" in socketMessage) {
-    handleObservationMessage(socketMessage);
-  } else if ("message" in socketMessage) {
+  } else if ("status" in socketMessage) {
    handleStatusMessage(socketMessage);
+  } else {
+    handleObservationMessage(socketMessage);
  }
 }
--- a/frontend/src/state/statusSlice.ts
+++ b/frontend/src/state/statusSlice.ts
@@ -2,7 +2,7 @@ import { createSlice, PayloadAction } from "@reduxjs/toolkit";
 import { StatusMessage } from "#/types/Message";

 const initialStatusMessage: StatusMessage = {
-  message: "",
+  status: "",
  is_error: false,
 };

--- a/frontend/src/types/Message.tsx
+++ b/frontend/src/types/Message.tsx
@@ -38,5 +38,5 @@ export interface StatusMessage {
  is_error: boolean;

  // A status message to display to the user
-  message: string;
+  status: string;
 }
--- a/openhands/init.py
+++ b/openhands/init.py
@@ -1,3 +1,6 @@
+import os
+
+
 def get_version():
    try:
        from importlib.metadata import PackageNotFoundError, version
@@ -19,6 +22,16 @@ def get_version():
    except ImportError:
        pass

+    # Try getting the version from pyproject.toml
+    try:
+        root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        with open(os.path.join(root_dir, 'pyproject.toml'), 'r') as f:
+            for line in f:
+                if line.startswith('version ='):
+                    return line.split('=')[1].strip().strip('"')
+    except FileNotFoundError:
+        pass
+
    return 'unknown'


--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -1,4 +1,5 @@
 import asyncio
+import copy
 import traceback
 from typing import Type

@@ -36,6 +37,7 @@ from openhands.events.observation import (
    ErrorObservation,
    Observation,
 )
+from openhands.events.serialization.event import truncate_content
 from openhands.llm.llm import LLM
 from openhands.runtime.utils.shutdown_listener import should_continue

@@ -54,7 +56,7 @@ class AgentController:
    confirmation_mode: bool
    agent_to_llm_config: dict[str, LLMConfig]
    agent_configs: dict[str, AgentConfig]
-    agent_task: asyncio.Task | None = None
+    agent_task: asyncio.Future | None = None
    parent: 'AgentController | None' = None
    delegate: 'AgentController | None' = None
    _pending_action: Action | None = None
@@ -115,13 +117,8 @@ class AgentController:
        # stuck helper
        self._stuck_detector = StuckDetector(self.state)

-        if not is_delegate:
-            self.agent_task = asyncio.create_task(self._start_step_loop())
-
    async def close(self):
        """Closes the agent controller, canceling any ongoing tasks and unsubscribing from the event stream."""
-        if self.agent_task is not None:
-            self.agent_task.cancel()
        await self.set_agent_state_to(AgentState.STOPPED)
        self.event_stream.unsubscribe(EventStreamSubscriber.AGENT_CONTROLLER)

@@ -149,7 +146,7 @@ class AgentController:
            self.state.last_error += f': {exception}'
        self.event_stream.add_event(ErrorObservation(message), EventSource.AGENT)

-    async def _start_step_loop(self):
+    async def start_step_loop(self):
        """The main loop for the agent's step-by-step execution."""

        logger.info(f'[Agent Controller {self.id}] Starting step loop...')
@@ -223,7 +220,13 @@ class AgentController:
        ):
            return

-        logger.info(observation, extra={'msg_type': 'OBSERVATION'})
+        # Make sure we print the observation in the same way as the LLM sees it
+        observation_to_print = copy.deepcopy(observation)
+        if len(observation_to_print.content) > self.agent.llm.config.max_message_chars:
+            observation_to_print.content = truncate_content(
+                observation_to_print.content, self.agent.llm.config.max_message_chars
+            )
+        logger.info(observation_to_print, extra={'msg_type': 'OBSERVATION'})
        if self._pending_action and self._pending_action.id == observation.cause:
            self._pending_action = None
            if self.state.agent_state == AgentState.USER_CONFIRMED:
@@ -358,7 +361,7 @@ class AgentController:
            # global metrics should be shared between parent and child
            metrics=self.state.metrics,
        )
-        logger.debug(
+        logger.info(
            f'[Agent Controller {self.id}]: start delegate, creating agent {delegate_agent.name} using LLM {llm}'
        )
        self.delegate = AgentController(
@@ -387,15 +390,16 @@ class AgentController:

        if self.delegate is not None:
            assert self.delegate != self
+            if self.delegate.get_agent_state() == AgentState.PAUSED:
+                await asyncio.sleep(1)
+            else:
+                await self._delegate_step()
            return

-        if self.state.delegate_level == 0:
-            logger.info(f'{self.agent.name} STEP {self.state.iteration}')
-        else:
-            logger.info(
-                f'{self.agent.name} LEVEL {self.state.delegate_level} LOCAL STEP {self.state.local_iteration} GLOBAL STEP {self.state.iteration}',
-                extra={'msg_type': 'STEP'},
-            )
+        logger.info(
+            f'{self.agent.name} LEVEL {self.state.delegate_level} LOCAL STEP {self.state.local_iteration} GLOBAL STEP {self.state.iteration}',
+            extra={'msg_type': 'STEP'},
+        )

        # check if agent hit the resources limit
        stop_step = False
@@ -449,13 +453,12 @@ class AgentController:

    async def _delegate_step(self):
        """Executes a single step of the delegate agent."""
+        logger.debug(f'[Agent Controller {self.id}] Delegate not none, awaiting...')
        await self.delegate._step()  # type: ignore[union-attr]
-
-        # when the delegate step is done, check its state
+        logger.debug(f'[Agent Controller {self.id}] Delegate step done')
        assert self.delegate is not None
        delegate_state = self.delegate.get_agent_state()
-
-        # clean up if the delegate has finished, normally or abnormally
+        logger.debug(f'[Agent Controller {self.id}] Delegate state: {delegate_state}')
        if delegate_state == AgentState.ERROR:
            # update iteration that shall be shared across agents
            self.state.iteration = self.delegate.state.iteration
@@ -467,7 +470,7 @@ class AgentController:

            await self.report_error('Delegator agent encountered an error')
        elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED):
-            logger.debug(
+            logger.info(
                f'[Agent Controller {self.id}] Delegate agent has finished execution'
            )
            # retrieve delegate result
@@ -509,9 +512,7 @@ class AgentController:
        """
        stop_step = False
        if self.state.traffic_control_state == TrafficControlState.PAUSED:
-            logger.debug(
-                'Hitting traffic control, temporarily resume upon user request'
-            )
+            logger.info('Hitting traffic control, temporarily resume upon user request')
            self.state.traffic_control_state = TrafficControlState.NORMAL
        else:
            self.state.traffic_control_state = TrafficControlState.THROTTLING
--- a/openhands/controller/state/task.py
+++ b/openhands/controller/state/task.py
@@ -108,7 +108,7 @@ class Task:
            TaskInvalidStateError: If the provided state is invalid.
        """
        if state not in STATES:
-            logger.error(f'Invalid state: {state}')
+            logger.error('Invalid state: %s', state)
            raise TaskInvalidStateError(state)
        self.state = state
        if (
--- a/openhands/core/cli.py
+++ b/openhands/core/cli.py
@@ -121,6 +121,9 @@ async def main():
        event_stream=event_stream,
    )

+    if controller is not None:
+        controller.agent_task = asyncio.create_task(controller.start_step_loop())
+
    async def prompt_for_next_task():
        next_message = input('How can I help? >> ')
        if next_message == 'exit':
--- a/openhands/core/config/llm_config.py
+++ b/openhands/core/config/llm_config.py
@@ -1,3 +1,4 @@
+import os
 from dataclasses import dataclass, fields

 from openhands.core.config.config_utils import get_field_info
@@ -36,7 +37,7 @@ class LLMConfig:
        ollama_base_url: The base URL for the OLLAMA API.
        drop_params: Drop any unmapped (unsupported) params without causing an exception.
        disable_vision: If model is vision capable, this option allows to disable image processing (useful for cost reduction).
-        caching_prompt: Using the prompt caching feature provided by the LLM.
+        caching_prompt: Use the prompt caching feature if provided by the LLM and supported by the provider.
        log_completions: Whether to log LLM completions to the state.
    """

@@ -68,7 +69,7 @@ class LLMConfig:
    ollama_base_url: str | None = None
    drop_params: bool = True
    disable_vision: bool | None = None
-    caching_prompt: bool = False
+    caching_prompt: bool = True
    log_completions: bool = False

    def defaults_to_dict(self) -> dict:
@@ -78,6 +79,18 @@ class LLMConfig:
            result[f.name] = get_field_info(f)
        return result

+    def __post_init__(self):
+        """
+        Post-initialization hook to assign OpenRouter-related variables to environment variables.
+        This ensures that these values are accessible to litellm at runtime.
+        """
+
+        # Assign OpenRouter-specific variables to environment variables
+        if self.openrouter_site_url:
+            os.environ['OR_SITE_URL'] = self.openrouter_site_url
+        if self.openrouter_app_name:
+            os.environ['OR_APP_NAME'] = self.openrouter_app_name
+
    def __str__(self):
        attr_str = []
        for f in fields(self):
@@ -101,9 +114,3 @@ class LLMConfig:
            if k in LLM_SENSITIVE_FIELDS:
                ret[k] = '******' if v else None
        return ret
-
-    def set_missing_attributes(self):
-        """Set any missing attributes to their default values."""
-        for field_name, field_obj in self.__dataclass_fields__.items():
-            if not hasattr(self, field_name):
-                setattr(self, field_name, field_obj.default)
--- a/openhands/core/config/sandbox_config.py
+++ b/openhands/core/config/sandbox_config.py
@@ -9,7 +9,8 @@ class SandboxConfig:
    """Configuration for the sandbox.

    Attributes:
-        api_hostname: The hostname for the EventStream Runtime API.
+        remote_runtime_api_url: The hostname for the Remote Runtime API.
+        local_runtime_url: The default hostname for the local runtime. You may want to change to http://host.docker.internal for DIND environments
        base_container_image: The base container image from which to build the runtime image.
        runtime_container_image: The runtime container image to use.
        user_id: The user ID for the sandbox.
@@ -30,7 +31,8 @@ class SandboxConfig:
            Default is None for general purpose browsing. Check evaluation/miniwob and evaluation/webarena for examples.
    """

-    api_hostname: str = 'localhost'
+    remote_runtime_api_url: str = 'http://localhost:8000'
+    local_runtime_url: str = 'http://localhost'
    api_key: str | None = None
    base_container_image: str = 'nikolaik/python-nodejs:python3.11-nodejs22'  # default to nikolaik/python-nodejs:python3.11-nodejs22 for eventstream runtime
    runtime_container_image: str | None = None
--- a/openhands/core/logger.py
+++ b/openhands/core/logger.py
@@ -1,22 +1,13 @@
-import copy
-import glob
 import logging
 import os
 import re
 import sys
 import traceback
 from datetime import datetime
-from enum import Enum
 from typing import Literal, Mapping

 from termcolor import colored

-
-class LlmLogType(Enum):
-    PROMPT = 'prompt'
-    RESPONSE = 'response'
-
-
 LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO').upper()
 DEBUG = os.getenv('DEBUG', 'False').lower() in ['true', '1', 'yes']
 if DEBUG:
@@ -44,7 +35,6 @@ ColorType = Literal[
 ]

 LOG_COLORS: Mapping[str, ColorType] = {
-    'DEBUG': 'blue',
    'ACTION': 'green',
    'USER_ACTION': 'light_red',
    'OBSERVATION': 'yellow',
@@ -56,10 +46,8 @@ LOG_COLORS: Mapping[str, ColorType] = {


 class ColoredFormatter(logging.Formatter):
-    """Formatter for colored logging in console."""
-
-    def format(self, record: logging.LogRecord) -> str:
-        msg_type = record.__dict__.get('msg_type', 'INFO')
+    def format(self, record):
+        msg_type = record.__dict__.get('msg_type')
        event_source = record.__dict__.get('event_source')
        if event_source:
            new_msg_type = f'{event_source.upper()}_{msg_type}'
@@ -82,45 +70,21 @@ class ColoredFormatter(logging.Formatter):
        return super().format(record)


-class NoColorFormatter(logging.Formatter):
-    """Formatter for non-colored logging in files."""
-
-    def format(self, record: logging.LogRecord) -> str:
-        # Create a deep copy of the record to avoid modifying the original
-        new_record: logging.LogRecord = copy.deepcopy(record)
-        # Strip ANSI color codes from the message
-        new_record.msg = strip_ansi(new_record.msg)
-
-        return super().format(new_record)
-
-
-def strip_ansi(s: str) -> str:
-    """
-    Removes ANSI escape sequences from str, as defined by ECMA-048 in
-    http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-048.pdf
-    # https://github.com/ewen-lbh/python-strip-ansi/blob/master/strip_ansi/__init__.py
-    """
-    pattern = re.compile(r'\x1B\[\d+(;\d+){0,2}m')
-    stripped = pattern.sub('', s)
-    return stripped
-
-
-console_formatter: ColoredFormatter = ColoredFormatter(
+console_formatter = ColoredFormatter(
    '\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s',
    datefmt='%H:%M:%S',
 )

-file_formatter: NoColorFormatter = NoColorFormatter(
+file_formatter = logging.Formatter(
    '%(asctime)s - %(name)s:%(levelname)s: %(filename)s:%(lineno)s - %(message)s',
    datefmt='%H:%M:%S',
 )
-
-llm_formatter: logging.Formatter = logging.Formatter('%(message)s')
+llm_formatter = logging.Formatter('%(message)s')


 class SensitiveDataFilter(logging.Filter):
-    def filter(self, record: logging.LogRecord) -> bool:
-        # Start with attributes
+    def filter(self, record):
+        # start with attributes
        sensitive_patterns = [
            'api_key',
            'aws_access_key_id',
@@ -130,21 +94,17 @@ class SensitiveDataFilter(logging.Filter):
            'jwt_secret',
        ]

-        # Add env var names
+        # add env var names
        env_vars = [attr.upper() for attr in sensitive_patterns]
        sensitive_patterns.extend(env_vars)

-        # And some special cases
-        sensitive_patterns.extend(
-            [
-                'JWT_SECRET',
-                'LLM_API_KEY',
-                'GITHUB_TOKEN',
-                'SANDBOX_ENV_GITHUB_TOKEN',
-            ]
-        )
+        # and some special cases
+        sensitive_patterns.append('JWT_SECRET')
+        sensitive_patterns.append('LLM_API_KEY')
+        sensitive_patterns.append('GITHUB_TOKEN')
+        sensitive_patterns.append('SANDBOX_ENV_GITHUB_TOKEN')

-        # This also formats the message with % args
+        # this also formats the message with % args
        msg = record.getMessage()
        record.args = ()

@@ -152,29 +112,25 @@ class SensitiveDataFilter(logging.Filter):
            pattern = rf"{attr}='?([\w-]+)'?"
            msg = re.sub(pattern, f"{attr}='******'", msg)

-        # Passed with msg
+        # passed with msg
        record.msg = msg
        return True


-def get_console_handler(log_level: int = logging.INFO) -> logging.StreamHandler:
+def get_console_handler(log_level=logging.INFO):
    """Returns a console handler for logging."""
-    console_handler: logging.StreamHandler = logging.StreamHandler()
+    console_handler = logging.StreamHandler()
    console_handler.setLevel(log_level)
    console_handler.setFormatter(console_formatter)
    return console_handler


-def get_file_handler(
-    log_dir: str, log_level: int = logging.INFO
-) -> logging.FileHandler:
+def get_file_handler(log_dir, log_level=logging.INFO):
    """Returns a file handler for logging."""
    os.makedirs(log_dir, exist_ok=True)
-    timestamp: str = datetime.now().strftime('%Y-%m-%d')
-    file_name: str = f'openhands_{timestamp}.log'
-    file_handler: logging.FileHandler = logging.FileHandler(
-        os.path.join(log_dir, file_name)
-    )
+    timestamp = datetime.now().strftime('%Y-%m-%d')
+    file_name = f'openhands_{timestamp}.log'
+    file_handler = logging.FileHandler(os.path.join(log_dir, file_name))
    file_handler.setLevel(log_level)
    file_handler.setFormatter(file_formatter)
    return file_handler
@@ -209,14 +165,14 @@ openhands_logger.setLevel(current_log_level)

 if current_log_level == logging.DEBUG:
    LOG_TO_FILE = True
+    openhands_logger.info('DEBUG mode enabled.')

 openhands_logger.addHandler(get_console_handler(current_log_level))
 openhands_logger.addFilter(SensitiveDataFilter(openhands_logger.name))
 openhands_logger.propagate = False
 openhands_logger.debug('Logging initialized')

-# Define LOG_DIR after setting up the logger
-LOG_DIR: str = os.path.join(
+LOG_DIR = os.path.join(
    # parent dir of openhands/core (i.e., root of the repo)
    os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
    'logs',
@@ -235,101 +191,71 @@ logging.getLogger('LiteLLM Proxy').disabled = True


 class LlmFileHandler(logging.FileHandler):
-    """LLM prompt and response logging"""
+    """# LLM prompt and response logging"""

-    _prompt_instances: dict[str, 'LlmFileHandler'] = {}
-    _response_instances: dict[str, 'LlmFileHandler'] = {}
+    def __init__(self, filename, mode='a', encoding='utf-8', delay=False):
+        """Initializes an instance of LlmFileHandler.

-    @classmethod
-    def get_instance(cls, sid: str, llm_log_type: LlmLogType) -> 'LlmFileHandler':
-        """Get or create an LlmFileHandler instance for the given session ID and filename."""
-        if llm_log_type == LlmLogType.PROMPT:
-            if sid not in cls._prompt_instances:
-                cls._prompt_instances[sid] = cls(sid, llm_log_type.value)
-            return cls._prompt_instances[sid]
-        elif llm_log_type == LlmLogType.RESPONSE:
-            if sid not in cls._response_instances:
-                cls._response_instances[sid] = cls(sid, llm_log_type.value)
-            return cls._response_instances[sid]
+        Args:
+            filename (str): The name of the log file.
+            mode (str, optional): The file mode. Defaults to 'a'.
+            encoding (str, optional): The file encoding. Defaults to None.
+            delay (bool, optional): Whether to delay file opening. Defaults to False.
+        """
+        self.filename = filename
+        self.message_counter = 1
+        if DEBUG:
+            self.session = datetime.now().strftime('%y-%m-%d_%H-%M')
        else:
-            raise ValueError(
-                f'Invalid llm_log_type: {llm_log_type}. Must be a LlmLogType enum.'
-            )
-
-    def __init__(
-        self,
-        sid: str,
-        filename: str,
-        mode: str = 'a',
-        encoding: str = 'utf-8',
-        delay: bool = True,
-    ) -> None:
-        """Initializes an instance of LlmFileHandler."""
-        self.filename: str = filename
-        self.message_counter: int = 1
-        self.log_directory: str = os.path.join(LOG_DIR, 'llm', sid)
+            self.session = 'default'
+        self.log_directory = os.path.join(LOG_DIR, 'llm', self.session)
        os.makedirs(self.log_directory, exist_ok=True)
-
        if not DEBUG:
            # Clear the log directory if not in debug mode
            for file in os.listdir(self.log_directory):
-                file_path: str = os.path.join(self.log_directory, file)
+                file_path = os.path.join(self.log_directory, file)
                try:
                    os.unlink(file_path)
                except Exception as e:
-                    openhands_logger.error(f'Failed to delete {file_path}. Reason: {e}')
-        else:
-            # In DEBUG mode, continue writing existing log directory
-            # Find the highest message counter
-            existing_files: list[str] = glob.glob(
-                os.path.join(self.log_directory, f'{self.filename}_*.log')
-            )
-            if existing_files:
-                highest_counter: int = max(
-                    int(f.split('_')[-1].split('.')[0]) for f in existing_files
-                )
-                self.message_counter = highest_counter + 1
-
-        filename_full: str = f'{self.filename}_{self.message_counter:03}.log'
-        self.baseFilename: str = os.path.join(self.log_directory, filename_full)
+                    openhands_logger.error(
+                        'Failed to delete %s. Reason: %s', file_path, e
+                    )
+        filename = f'{self.filename}_{self.message_counter:03}.log'
+        self.baseFilename = os.path.join(self.log_directory, filename)
        super().__init__(self.baseFilename, mode, encoding, delay)

-    def emit(self, record: logging.LogRecord) -> None:
-        """Emits a log record."""
+    def emit(self, record):
+        """Emits a log record.

-        filename_full: str = f'{self.filename}_{self.message_counter:03}.log'
-        self.baseFilename = os.path.join(self.log_directory, filename_full)
+        Args:
+            record (logging.LogRecord): The log record to emit.
+        """
+        filename = f'{self.filename}_{self.message_counter:03}.log'
+        self.baseFilename = os.path.join(self.log_directory, filename)
        self.stream = self._open()
        super().emit(record)
        self.stream.close()
-        openhands_logger.debug(f'Logging to {self.baseFilename}')
+        openhands_logger.debug('Logging to %s', self.baseFilename)
        self.message_counter += 1


-def _get_llm_file_handler(
-    llm_log_type: LlmLogType, sid: str, log_level: int
-) -> logging.FileHandler:
-    llm_file_handler: LlmFileHandler = LlmFileHandler.get_instance(sid, llm_log_type)
+def _get_llm_file_handler(name: str, log_level: int):
+    # The 'delay' parameter, when set to True, postpones the opening of the log file
+    # until the first log message is emitted.
+    llm_file_handler = LlmFileHandler(name, delay=True)
    llm_file_handler.setFormatter(llm_formatter)
    llm_file_handler.setLevel(log_level)
    return llm_file_handler


-def _setup_llm_logger(
-    llm_log_type: LlmLogType, sid: str, log_level: int
-) -> logging.Logger:
-    logger: logging.Logger = logging.getLogger(f'{llm_log_type.value}_{sid}')
+def _setup_llm_logger(name: str, log_level: int):
+    logger = logging.getLogger(name)
    logger.propagate = False
    logger.setLevel(log_level)
    if LOG_TO_FILE:
-        logger.addHandler(_get_llm_file_handler(llm_log_type, sid, log_level))
+        logger.addHandler(_get_llm_file_handler(name, log_level))
    return logger


-def get_llm_loggers(sid: str = 'default') -> dict[LlmLogType, logging.Logger]:
-    return {
-        LlmLogType.PROMPT: _setup_llm_logger(LlmLogType.PROMPT, sid, current_log_level),
-        LlmLogType.RESPONSE: _setup_llm_logger(
-            LlmLogType.RESPONSE, sid, current_log_level
-        ),
-    }
+llm_prompt_logger = _setup_llm_logger('prompt', current_log_level)
+llm_response_logger = _setup_llm_logger('response', current_log_level)
--- a/openhands/core/main.py
+++ b/openhands/core/main.py
@@ -125,12 +125,12 @@ async def run_controller(
    initial_state = None
    if config.enable_cli_session:
        try:
+            logger.info(f'Restoring agent state from cli session {event_stream.sid}')
            initial_state = State.restore_from_session(
                event_stream.sid, event_stream.file_store
            )
-            logger.debug(f'Restored agent state from cli session {event_stream.sid}')
        except Exception as e:
-            logger.debug(f'Cannot restore state: {e}')
+            logger.info(f'Error restoring state: {e}')

    # init controller with this initial state
    controller = AgentController(
@@ -143,6 +143,9 @@ async def run_controller(
        headless_mode=headless_mode,
    )

+    if controller is not None:
+        controller.agent_task = asyncio.create_task(controller.start_step_loop())
+
    assert isinstance(task_str, str), f'task_str must be a string, got {type(task_str)}'
    # Logging
    logger.info(
--- a/openhands/core/message.py
+++ b/openhands/core/message.py
@@ -50,6 +50,8 @@ class ImageContent(Content):
 class Message(BaseModel):
    role: Literal['user', 'system', 'assistant']
    content: list[TextContent | ImageContent] = Field(default=list)
+    cache_enabled: bool = False
+    vision_enabled: bool = False

    @property
    def contains_image(self) -> bool:
@@ -58,23 +60,22 @@ class Message(BaseModel):
    @model_serializer
    def serialize_model(self) -> dict:
        content: list[dict] | str
-        if self.role == 'system':
-            # For system role, concatenate all text content into a single string
-            content = '\n'.join(
-                item.text for item in self.content if isinstance(item, TextContent)
-            )
-        elif self.role == 'assistant' and not self.contains_image:
-            # For assistant role without vision, concatenate all text content into a single string
-            content = '\n'.join(
-                item.text for item in self.content if isinstance(item, TextContent)
-            )
-        else:
-            # For user role or assistant role with vision enabled, serialize each content item
+        # two kinds of serializer:
+        # 1. vision serializer: when prompt caching or vision is enabled
+        # 2. single text serializer: for other cases
+        # remove this when liteLLM or providers support this format translation
+        if self.cache_enabled or self.vision_enabled:
+            # when prompt caching or vision is enabled, use vision serializer
            content = []
            for item in self.content:
                if isinstance(item, TextContent):
                    content.append(item.model_dump())
                elif isinstance(item, ImageContent):
                    content.extend(item.model_dump())
-
+        else:
+            # for other cases, concatenate all text content
+            # into a single string per message
+            content = '\n'.join(
+                item.text for item in self.content if isinstance(item, TextContent)
+            )
        return {'content': content, 'role': self.role}
--- a/openhands/linter/init.py
+++ b/openhands/linter/init.py
@@ -0,0 +1,9 @@
+"""Linter module for OpenHands.
+
+Part of this Linter module is adapted from Aider (Apache 2.0 License, [original code](https://github.com/paul-gauthier/aider/blob/main/aider/linter.py)). Please see the [original repository](https://github.com/paul-gauthier/aider) for more information.
+"""
+
+from openhands.linter.base import LintResult
+from openhands.linter.linter import DefaultLinter
+
+__all__ = ['DefaultLinter', 'LintResult']
--- a/openhands/linter/base.py
+++ b/openhands/linter/base.py
@@ -0,0 +1,79 @@
+from abc import ABC, abstractmethod
+
+from pydantic import BaseModel
+
+
+class LintResult(BaseModel):
+    file: str
+    line: int  # 1-indexed
+    column: int  # 1-indexed
+    message: str
+
+    def visualize(self, half_window: int = 3) -> str:
+        """Visualize the lint result by print out all the lines where the lint result is found.
+
+        Args:
+            half_window: The number of context lines to display around the error on each side.
+        """
+        with open(self.file, 'r') as f:
+            file_lines = f.readlines()
+
+        # Add line numbers
+        _span_size = len(str(len(file_lines)))
+        file_lines = [
+            f'{i + 1:>{_span_size}}|{line.rstrip()}'
+            for i, line in enumerate(file_lines)
+        ]
+
+        # Get the window of lines to display
+        assert self.line <= len(file_lines) and self.line > 0
+        line_idx = self.line - 1
+        begin_window = max(0, line_idx - half_window)
+        end_window = min(len(file_lines), line_idx + half_window + 1)
+
+        selected_lines = file_lines[begin_window:end_window]
+        line_idx_in_window = line_idx - begin_window
+
+        # Add character hint
+        _character_hint = (
+            _span_size * ' '
+            + ' ' * (self.column)
+            + '^'
+            + ' ERROR HERE: '
+            + self.message
+        )
+        selected_lines[line_idx_in_window] = (
+            f'\033[91m{selected_lines[line_idx_in_window]}\033[0m'
+            + '\n'
+            + _character_hint
+        )
+        return '\n'.join(selected_lines)
+
+
+class LinterException(Exception):
+    """Base class for all linter exceptions."""
+
+    pass
+
+
+class BaseLinter(ABC):
+    """Base class for all linters.
+
+    Each linter should be able to lint files of a specific type and return a list of (parsed) lint results.
+    """
+
+    encoding: str = 'utf-8'
+
+    @property
+    @abstractmethod
+    def supported_extensions(self) -> list[str]:
+        """The file extensions that this linter supports, such as .py or .tsx."""
+        return []
+
+    @abstractmethod
+    def lint(self, file_path: str) -> list[LintResult]:
+        """Lint the given file.
+
+        file_path: The path to the file to lint. Required to be absolute.
+        """
+        pass
--- a/openhands/linter/languages/python.py
+++ b/openhands/linter/languages/python.py
@@ -0,0 +1,77 @@
+from typing import List
+
+from openhands.linter.base import BaseLinter, LintResult
+from openhands.linter.utils import run_cmd
+
+
+def python_compile_lint(fname: str) -> list[LintResult]:
+    try:
+        with open(fname, 'r') as f:
+            code = f.read()
+        compile(code, fname, 'exec')  # USE TRACEBACK BELOW HERE
+        return []
+    except SyntaxError as err:
+        err_lineno = getattr(err, 'end_lineno', err.lineno)
+        err_offset = getattr(err, 'end_offset', err.offset)
+        if err_offset and err_offset < 0:
+            err_offset = err.offset
+        return [
+            LintResult(
+                file=fname, line=err_lineno, column=err_offset or 1, message=err.msg
+            )
+        ]
+
+
+def flake_lint(filepath: str) -> list[LintResult]:
+    fatal = 'F821,F822,F831,E112,E113,E999,E902'
+    flake8_cmd = f'flake8 --select={fatal} --isolated {filepath}'
+
+    try:
+        cmd_outputs = run_cmd(flake8_cmd)
+    except FileNotFoundError:
+        return []
+    results: list[LintResult] = []
+    if not cmd_outputs:
+        return results
+    for line in cmd_outputs.splitlines():
+        parts = line.split(':')
+        if len(parts) >= 4:
+            _msg = parts[3].strip()
+            if len(parts) > 4:
+                _msg += ': ' + parts[4].strip()
+            results.append(
+                LintResult(
+                    file=filepath,
+                    line=int(parts[1]),
+                    column=int(parts[2]),
+                    message=_msg,
+                )
+            )
+    return results
+
+
+class PythonLinter(BaseLinter):
+    @property
+    def supported_extensions(self) -> List[str]:
+        return ['.py']
+
+    def lint(self, file_path: str) -> list[LintResult]:
+        error = flake_lint(file_path)
+        if not error:
+            error = python_compile_lint(file_path)
+        return error
+
+    def compile_lint(self, file_path: str, code: str) -> List[LintResult]:
+        try:
+            compile(code, file_path, 'exec')
+            return []
+        except SyntaxError as e:
+            return [
+                LintResult(
+                    file=file_path,
+                    line=e.lineno,
+                    column=e.offset,
+                    message=str(e),
+                    rule='SyntaxError',
+                )
+            ]
--- a/openhands/linter/languages/treesitter.py
+++ b/openhands/linter/languages/treesitter.py
@@ -0,0 +1,74 @@
+import warnings
+
+from grep_ast import TreeContext, filename_to_lang
+from grep_ast.parsers import PARSERS
+from tree_sitter_languages import get_parser
+
+from openhands.linter.base import BaseLinter, LintResult
+
+# tree_sitter is throwing a FutureWarning
+warnings.simplefilter('ignore', category=FutureWarning)
+
+
+def tree_context(fname, code, line_nums):
+    context = TreeContext(
+        fname,
+        code,
+        color=False,
+        line_number=True,
+        child_context=False,
+        last_line=False,
+        margin=0,
+        mark_lois=True,
+        loi_pad=3,
+        # header_max=30,
+        show_top_of_file_parent_scope=False,
+    )
+    line_nums = set(line_nums)
+    context.add_lines_of_interest(line_nums)
+    context.add_context()
+    output = context.format()
+    return output
+
+
+def traverse_tree(node):
+    """Traverses the tree to find errors."""
+    errors = []
+    if node.type == 'ERROR' or node.is_missing:
+        line_no = node.start_point[0] + 1
+        col_no = node.start_point[1] + 1
+        error_type = 'Missing node' if node.is_missing else 'Syntax error'
+        errors.append((line_no, col_no, error_type))
+
+    for child in node.children:
+        errors += traverse_tree(child)
+
+    return errors
+
+
+class TreesitterBasicLinter(BaseLinter):
+    @property
+    def supported_extensions(self) -> list[str]:
+        return list(PARSERS.keys())
+
+    def lint(self, file_path: str) -> list[LintResult]:
+        """Use tree-sitter to look for syntax errors, display them with tree context."""
+        lang = filename_to_lang(file_path)
+        if not lang:
+            return []
+        parser = get_parser(lang)
+        with open(file_path, 'r') as f:
+            code = f.read()
+        tree = parser.parse(bytes(code, 'utf-8'))
+        errors = traverse_tree(tree.root_node)
+        if not errors:
+            return []
+        return [
+            LintResult(
+                file=file_path,
+                line=int(line),
+                column=int(col),
+                message=error_details,
+            )
+            for line, col, error_details in errors
+        ]
--- a/openhands/linter/linter.py
+++ b/openhands/linter/linter.py
@@ -0,0 +1,35 @@
+import os
+from collections import defaultdict
+
+from openhands.linter.base import BaseLinter, LinterException, LintResult
+from openhands.linter.languages.python import PythonLinter
+from openhands.linter.languages.treesitter import TreesitterBasicLinter
+
+
+class DefaultLinter(BaseLinter):
+    def __init__(self):
+        self.linters: dict[str, list[BaseLinter]] = defaultdict(list)
+        self.linters['.py'] = [PythonLinter()]
+
+        # Add treesitter linter as a fallback for all linters
+        self.basic_linter = TreesitterBasicLinter()
+        for extension in self.basic_linter.supported_extensions:
+            self.linters[extension].append(self.basic_linter)
+        self._supported_extensions = list(self.linters.keys())
+
+    @property
+    def supported_extensions(self) -> list[str]:
+        return self._supported_extensions
+
+    def lint(self, file_path: str) -> list[LintResult]:
+        if not os.path.isabs(file_path):
+            raise LinterException(f'File path {file_path} is not an absolute path')
+        file_extension = os.path.splitext(file_path)[1]
+
+        linters: list[BaseLinter] = self.linters.get(file_extension, [])
+        for linter in linters:
+            res = linter.lint(file_path)
+            # We always return the first linter's result (higher priority)
+            if res:
+                return res
+        return []
--- a/openhands/linter/utils/init.py
+++ b/openhands/linter/utils/init.py
@@ -0,0 +1,3 @@
+from .cmd import run_cmd, check_tool_installed
+
+__all__ = ['run_cmd', 'check_tool_installed']
--- a/openhands/linter/utils/cmd.py
+++ b/openhands/linter/utils/cmd.py
@@ -0,0 +1,36 @@
+import subprocess
+import os
+
+def run_cmd(cmd: str, cwd: str | None = None) -> str | None:
+    """Run a command and return the output.
+
+    If the command succeeds, return None. If the command fails, return the stdout.
+    """
+
+    process = subprocess.Popen(
+        cmd.split(),
+        cwd=cwd,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        encoding='utf-8',
+        errors='replace',
+    )
+    stdout, _ = process.communicate()
+    if process.returncode == 0:
+        return None
+    return stdout
+
+
+def check_tool_installed(tool_name: str) -> bool:
+    """Check if a tool is installed."""
+    try:
+        subprocess.run(
+            [tool_name, '--version'],
+            check=True,
+            cwd=os.getcwd(),
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+        return True
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return False
--- a/openhands/llm/init.py
+++ b/openhands/llm/init.py
@@ -0,0 +1,5 @@
+from openhands.llm.async_llm import AsyncLLM
+from openhands.llm.llm import LLM
+from openhands.llm.streaming_llm import StreamingLLM
+
+__all__ = ['LLM', 'AsyncLLM', 'StreamingLLM']
--- a/openhands/llm/async_llm.py
+++ b/openhands/llm/async_llm.py
@@ -0,0 +1,117 @@
+import asyncio
+from functools import partial
+from typing import Any
+
+from litellm import completion as litellm_acompletion
+
+from openhands.core.exceptions import UserCancelledError
+from openhands.core.logger import openhands_logger as logger
+from openhands.llm.llm import LLM, LLM_RETRY_EXCEPTIONS
+from openhands.runtime.utils.shutdown_listener import should_continue
+
+
+class AsyncLLM(LLM):
+    """Asynchronous LLM class."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self._async_completion = partial(
+            self._call_acompletion,
+            model=self.config.model,
+            api_key=self.config.api_key,
+            base_url=self.config.base_url,
+            api_version=self.config.api_version,
+            custom_llm_provider=self.config.custom_llm_provider,
+            max_tokens=self.config.max_output_tokens,
+            timeout=self.config.timeout,
+            temperature=self.config.temperature,
+            top_p=self.config.top_p,
+            drop_params=self.config.drop_params,
+        )
+
+        async_completion_unwrapped = self._async_completion
+
+        @self.retry_decorator(
+            num_retries=self.config.num_retries,
+            retry_exceptions=LLM_RETRY_EXCEPTIONS,
+            retry_min_wait=self.config.retry_min_wait,
+            retry_max_wait=self.config.retry_max_wait,
+            retry_multiplier=self.config.retry_multiplier,
+        )
+        async def async_completion_wrapper(*args, **kwargs):
+            """Wrapper for the litellm acompletion function."""
+            messages: list[dict[str, Any]] | dict[str, Any] = []
+
+            # some callers might send the model and messages directly
+            # litellm allows positional args, like completion(model, messages, **kwargs)
+            # see llm.py for more details
+            if len(args) > 1:
+                messages = args[1] if len(args) > 1 else args[0]
+                kwargs['messages'] = messages
+
+                # remove the first args, they're sent in kwargs
+                args = args[2:]
+            elif 'messages' in kwargs:
+                messages = kwargs['messages']
+
+            # ensure we work with a list of messages
+            messages = messages if isinstance(messages, list) else [messages]
+
+            # if we have no messages, something went very wrong
+            if not messages:
+                raise ValueError(
+                    'The messages list is empty. At least one message is required.'
+                )
+
+            self.log_prompt(messages)
+
+            async def check_stopped():
+                while should_continue():
+                    if (
+                        hasattr(self.config, 'on_cancel_requested_fn')
+                        and self.config.on_cancel_requested_fn is not None
+                        and await self.config.on_cancel_requested_fn()
+                    ):
+                        raise UserCancelledError('LLM request cancelled by user')
+                    await asyncio.sleep(0.1)
+
+            stop_check_task = asyncio.create_task(check_stopped())
+
+            try:
+                # Directly call and await litellm_acompletion
+                resp = await async_completion_unwrapped(*args, **kwargs)
+
+                message_back = resp['choices'][0]['message']['content']
+                self.log_response(message_back)
+                self._post_completion(resp)
+
+                # We do not support streaming in this method, thus return resp
+                return resp
+
+            except UserCancelledError:
+                logger.info('LLM request cancelled by user.')
+                raise
+            except Exception as e:
+                logger.error(f'Completion Error occurred:\n{e}')
+                raise
+
+            finally:
+                await asyncio.sleep(0.1)
+                stop_check_task.cancel()
+                try:
+                    await stop_check_task
+                except asyncio.CancelledError:
+                    pass
+
+        self._async_completion = async_completion_wrapper  # type: ignore
+
+    async def _call_acompletion(self, *args, **kwargs):
+        """Wrapper for the litellm acompletion function."""
+        # Used in testing?
+        return await litellm_acompletion(*args, **kwargs)
+
+    @property
+    def async_completion(self):
+        """Decorator for the async litellm acompletion function."""
+        return self._async_completion
--- a/openhands/llm/bedrock.py
+++ b/openhands/llm/bedrock.py
@@ -21,8 +21,9 @@ def list_foundation_models(
        return ['bedrock/' + model['modelId'] for model in model_summaries]
    except Exception as err:
        logger.warning(
-            f'{err}. Please config AWS_REGION_NAME AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY'
+            '%s. Please config AWS_REGION_NAME AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY'
            ' if you want use bedrock model.',
+            err,
        )
        return []

--- a/openhands/llm/debug_mixin.py
+++ b/openhands/llm/debug_mixin.py
@@ -0,0 +1,51 @@
+from typing import Any
+
+from openhands.core.logger import llm_prompt_logger, llm_response_logger
+from openhands.core.logger import openhands_logger as logger
+
+MESSAGE_SEPARATOR = '\n\n----------\n\n'
+
+
+class DebugMixin:
+    def log_prompt(self, messages: list[dict[str, Any]] | dict[str, Any]):
+        if not messages:
+            logger.debug('No completion messages!')
+            return
+
+        messages = messages if isinstance(messages, list) else [messages]
+        debug_message = MESSAGE_SEPARATOR.join(
+            self._format_message_content(msg) for msg in messages if msg['content']
+        )
+
+        if debug_message:
+            llm_prompt_logger.debug(debug_message)
+        else:
+            logger.debug('No completion messages!')
+
+    def log_response(self, message_back: str):
+        if message_back:
+            llm_response_logger.debug(message_back)
+
+    def _format_message_content(self, message: dict[str, Any]):
+        content = message['content']
+        if isinstance(content, list):
+            return '\n'.join(
+                self._format_content_element(element) for element in content
+            )
+        return str(content)
+
+    def _format_content_element(self, element: dict[str, Any]):
+        if isinstance(element, dict):
+            if 'text' in element:
+                return element['text']
+            if (
+                self.vision_is_active()
+                and 'image_url' in element
+                and 'url' in element['image_url']
+            ):
+                return element['image_url']['url']
+        return str(element)
+
+    # This method should be implemented in the class that uses DebugMixin
+    def vision_is_active(self):
+        raise NotImplementedError
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -1,61 +1,54 @@
-import asyncio
 import copy
-import os
 import time
 import warnings
 from functools import partial
 from typing import Any

 from openhands.core.config import LLMConfig
-from openhands.runtime.utils.shutdown_listener import should_continue

 with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    import litellm
+from litellm import ModelInfo
 from litellm import completion as litellm_completion
 from litellm import completion_cost as litellm_completion_cost
 from litellm.exceptions import (
    APIConnectionError,
-    ContentPolicyViolationError,
    InternalServerError,
-    NotFoundError,
-    OpenAIError,
    RateLimitError,
    ServiceUnavailableError,
 )
-from litellm.types.utils import CostPerToken
-from tenacity import (
-    retry,
-    retry_if_exception_type,
-    retry_if_not_exception_type,
-    stop_after_attempt,
-    wait_exponential,
-)
+from litellm.types.utils import CostPerToken, ModelResponse, Usage

-from openhands.core.exceptions import (
-    LLMResponseError,
-    OperationCancelled,
-    UserCancelledError,
-)
-from openhands.core.logger import get_llm_loggers
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.message import Message
 from openhands.core.metrics import Metrics
-from openhands.runtime.utils.shutdown_listener import should_exit
+from openhands.llm.debug_mixin import DebugMixin
+from openhands.llm.retry_mixin import RetryMixin

 __all__ = ['LLM']

-message_separator = '\n\n----------\n\n'
+# tuple of exceptions to retry on
+LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
+    APIConnectionError,
+    InternalServerError,
+    RateLimitError,
+    ServiceUnavailableError,
+)

-cache_prompting_supported_models = [
+# cache prompt supporting models
+# remove this when we gemini and deepseek are supported
+CACHE_PROMPT_SUPPORTED_MODELS = [
    'claude-3-5-sonnet-20240620',
    'claude-3-haiku-20240307',
+    'claude-3-opus-20240229',
+    'anthropic/claude-3-opus-20240229',
+    'anthropic/claude-3-haiku-20240307',
+    'anthropic/claude-3-5-sonnet-20240620',
 ]

-llm_prompt_logger, llm_response_logger = get_llm_loggers().values()

-
-class LLM:
+class LLM(RetryMixin, DebugMixin):
    """The LLM class represents a Language Model instance.

    Attributes:
@@ -72,25 +65,20 @@ class LLM:
        Passing simple parameters always overrides config.

        Args:
-            config: The LLM configuration
+            config: The LLM configuration.
+            metrics: The metrics to use.
        """
-        self.metrics = metrics if metrics is not None else Metrics()
-        self.cost_metric_supported = True
-        self.config = copy.deepcopy(config)
-
-        os.environ['OR_SITE_URL'] = self.config.openrouter_site_url
-        os.environ['OR_APP_NAME'] = self.config.openrouter_app_name
+        self.metrics: Metrics = metrics if metrics is not None else Metrics()
+        self.cost_metric_supported: bool = True
+        self.config: LLMConfig = copy.deepcopy(config)

        # list of LLM completions (for logging purposes). Each completion is a dict with the following keys:
        # - 'messages': list of messages
        # - 'response': response from the LLM
        self.llm_completions: list[dict[str, Any]] = []

-        # Set up config attributes with default values to prevent AttributeError
-        LLMConfig.set_missing_attributes(self.config)
-
        # litellm actually uses base Exception here for unknown model
-        self.model_info = None
+        self.model_info: ModelInfo | None = None
        try:
            if self.config.model.startswith('openrouter'):
                self.model_info = litellm.get_model_info(self.config.model)
@@ -102,15 +90,6 @@ class LLM:
        except Exception as e:
            logger.warning(f'Could not get model info for {config.model}:\n{e}')

-        # Tuple of exceptions to retry on
-        self.retry_exceptions = (
-            APIConnectionError,
-            ContentPolicyViolationError,
-            InternalServerError,
-            OpenAIError,
-            RateLimitError,
-        )
-
        # Set the max tokens in an LM-specific way if not set
        if self.config.max_input_tokens is None:
            if (
@@ -138,30 +117,6 @@ class LLM:
                ):
                    self.config.max_output_tokens = self.model_info['max_tokens']

-        # This only seems to work with Google as the provider, not with OpenRouter!
-        gemini_safety_settings = (
-            [
-                {
-                    'category': 'HARM_CATEGORY_HARASSMENT',
-                    'threshold': 'BLOCK_NONE',
-                },
-                {
-                    'category': 'HARM_CATEGORY_HATE_SPEECH',
-                    'threshold': 'BLOCK_NONE',
-                },
-                {
-                    'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT',
-                    'threshold': 'BLOCK_NONE',
-                },
-                {
-                    'category': 'HARM_CATEGORY_DANGEROUS_CONTENT',
-                    'threshold': 'BLOCK_NONE',
-                },
-            ]
-            if self.config.model.lower().startswith('gemini')
-            else None
-        )
-
        self._completion = partial(
            litellm_completion,
            model=self.config.model,
@@ -174,84 +129,52 @@ class LLM:
            temperature=self.config.temperature,
            top_p=self.config.top_p,
            drop_params=self.config.drop_params,
-            **(
-                {'safety_settings': gemini_safety_settings}
-                if gemini_safety_settings is not None
-                else {}
-            ),
        )

        if self.vision_is_active():
            logger.debug('LLM: model has vision enabled')
+        if self.is_caching_prompt_active():
+            logger.debug('LLM: caching prompt enabled')

        completion_unwrapped = self._completion

-        def log_retry_attempt(retry_state):
-            """With before_sleep, this is called before `custom_completion_wait` and
-            ONLY if the retry is triggered by an exception."""
-            if should_exit():
-                raise OperationCancelled(
-                    'Operation cancelled.'
-                )  # exits the @retry loop
-            exception = retry_state.outcome.exception()
-            logger.error(
-                f'{exception}. Attempt #{retry_state.attempt_number} | You can customize retry values in the configuration.',
-                exc_info=False,
-            )
-
-        def custom_completion_wait(retry_state):
-            """Custom wait function for litellm completion."""
-            if not retry_state:
-                return 0
-            exception = retry_state.outcome.exception() if retry_state.outcome else None
-            if exception is None:
-                return 0
-
-            min_wait_time = self.config.retry_min_wait
-            max_wait_time = self.config.retry_max_wait
-
-            # for rate limit errors, wait 1 minute by default, max 4 minutes between retries
-            exception_type = type(exception).__name__
-            logger.error(f'\nexception_type: {exception_type}\n')
-
-            if exception_type == 'RateLimitError':
-                min_wait_time = 60
-                max_wait_time = 240
-            elif exception_type == 'BadRequestError' and exception.response:
-                # this should give us the burried, actual error message from
-                # the LLM model.
-                logger.error(f'\n\nBadRequestError: {exception.response}\n\n')
-
-            # Return the wait time using exponential backoff
-            exponential_wait = wait_exponential(
-                multiplier=self.config.retry_multiplier,
-                min=min_wait_time,
-                max=max_wait_time,
-            )
-
-            # Call the exponential wait function with retry_state to get the actual wait time
-            return exponential_wait(retry_state)
-
-        @retry(
-            before_sleep=log_retry_attempt,
-            stop=stop_after_attempt(self.config.num_retries),
-            reraise=True,
-            retry=(
-                retry_if_exception_type(self.retry_exceptions)
-                & retry_if_not_exception_type(OperationCancelled)
-            ),
-            wait=custom_completion_wait,
+        @self.retry_decorator(
+            num_retries=self.config.num_retries,
+            retry_exceptions=LLM_RETRY_EXCEPTIONS,
+            retry_min_wait=self.config.retry_min_wait,
+            retry_max_wait=self.config.retry_max_wait,
+            retry_multiplier=self.config.retry_multiplier,
        )
        def wrapper(*args, **kwargs):
            """Wrapper for the litellm completion function. Logs the input and output of the completion function."""
-            # some callers might just send the messages directly
-            if 'messages' in kwargs:
-                messages = kwargs['messages']
-            else:
-                messages = args[1] if len(args) > 1 else []
+            messages: list[dict[str, Any]] | dict[str, Any] = []

-            # this serves to prevent empty messages and logging the messages
-            debug_message = self._get_debug_message(messages)
+            # some callers might send the model and messages directly
+            # litellm allows positional args, like completion(model, messages, **kwargs)
+            if len(args) > 1:
+                # ignore the first argument if it's provided (it would be the model)
+                # design wise: we don't allow overriding the configured values
+                # implementation wise: the partial function set the model as a kwarg already
+                # as well as other kwargs
+                messages = args[1] if len(args) > 1 else args[0]
+                kwargs['messages'] = messages
+
+                # remove the first args, they're sent in kwargs
+                args = args[2:]
+            elif 'messages' in kwargs:
+                messages = kwargs['messages']
+
+            # ensure we work with a list of messages
+            messages = messages if isinstance(messages, list) else [messages]
+
+            # if we have no messages, something went very wrong
+            if not messages:
+                raise ValueError(
+                    'The messages list is empty. At least one message is required.'
+                )
+
+            # log the entire LLM prompt
+            self.log_prompt(messages)

            if self.is_caching_prompt_active():
                # Anthropic-specific prompt caching
@@ -260,239 +183,31 @@ class LLM:
                        'anthropic-beta': 'prompt-caching-2024-07-31',
                    }

-            # skip if messages is empty (thus debug_message is empty)
-            if debug_message:
-                llm_prompt_logger.debug(debug_message)
-                resp = completion_unwrapped(*args, **kwargs)
-            else:
-                logger.debug('No completion messages!')
-                resp = {'choices': [{'message': {'content': ''}}]}
+            # we don't support streaming here, thus we get a ModelResponse
+            resp: ModelResponse = completion_unwrapped(*args, **kwargs)

+            # log for evals or other scripts that need the raw completion
            if self.config.log_completions:
                self.llm_completions.append(
                    {
                        'messages': messages,
                        'response': resp,
                        'timestamp': time.time(),
-                        'cost': self.completion_cost(resp),
+                        'cost': self._completion_cost(resp),
                    }
                )

-            # log the response
-            message_back = resp['choices'][0]['message']['content']
-            if message_back:
-                llm_response_logger.debug(message_back)
+            message_back: str = resp['choices'][0]['message']['content']

-                # post-process to log costs
-                self._post_completion(resp)
+            # log the LLM response
+            self.log_response(message_back)
+
+            # post-process the response
+            self._post_completion(resp)

            return resp

-        self._completion = wrapper  # type: ignore
-
-        # Async version
-        self._async_completion = partial(
-            self._call_acompletion,
-            model=self.config.model,
-            api_key=self.config.api_key,
-            base_url=self.config.base_url,
-            api_version=self.config.api_version,
-            custom_llm_provider=self.config.custom_llm_provider,
-            max_tokens=self.config.max_output_tokens,
-            timeout=self.config.timeout,
-            temperature=self.config.temperature,
-            top_p=self.config.top_p,
-            drop_params=self.config.drop_params,
-            **(
-                {'safety_settings': gemini_safety_settings}
-                if gemini_safety_settings is not None
-                else {}
-            ),
-        )
-
-        async_completion_unwrapped = self._async_completion
-
-        @retry(
-            before_sleep=log_retry_attempt,
-            stop=stop_after_attempt(self.config.num_retries),
-            reraise=True,
-            retry=(
-                retry_if_exception_type(self.retry_exceptions)
-                & retry_if_not_exception_type(OperationCancelled)
-            ),
-            wait=custom_completion_wait,
-        )
-        async def async_completion_wrapper(*args, **kwargs):
-            """Async wrapper for the litellm acompletion function."""
-            # some callers might just send the messages directly
-            if 'messages' in kwargs:
-                messages = kwargs['messages']
-            else:
-                messages = args[1] if len(args) > 1 else []
-
-            # this serves to prevent empty messages and logging the messages
-            debug_message = self._get_debug_message(messages)
-
-            async def check_stopped():
-                while should_continue():
-                    if (
-                        hasattr(self.config, 'on_cancel_requested_fn')
-                        and self.config.on_cancel_requested_fn is not None
-                        and await self.config.on_cancel_requested_fn()
-                    ):
-                        raise UserCancelledError('LLM request cancelled by user')
-                    await asyncio.sleep(0.1)
-
-            stop_check_task = asyncio.create_task(check_stopped())
-
-            try:
-                # Directly call and await litellm_acompletion
-                if debug_message:
-                    llm_prompt_logger.debug(debug_message)
-                    resp = await async_completion_unwrapped(*args, **kwargs)
-                else:
-                    logger.debug('No completion messages!')
-                    resp = {'choices': [{'message': {'content': ''}}]}
-
-                # skip if messages is empty (thus debug_message is empty)
-                if debug_message:
-                    message_back = resp['choices'][0]['message']['content']
-                    llm_response_logger.debug(message_back)
-                else:
-                    resp = {'choices': [{'message': {'content': ''}}]}
-                self._post_completion(resp)
-
-                # We do not support streaming in this method, thus return resp
-                return resp
-
-            except UserCancelledError:
-                logger.info('LLM request cancelled by user.')
-                raise
-            except (
-                APIConnectionError,
-                ContentPolicyViolationError,
-                InternalServerError,
-                NotFoundError,
-                OpenAIError,
-                RateLimitError,
-                ServiceUnavailableError,
-            ) as e:
-                logger.error(f'Completion Error occurred:\n{e}')
-                raise
-
-            finally:
-                await asyncio.sleep(0.1)
-                stop_check_task.cancel()
-                try:
-                    await stop_check_task
-                except asyncio.CancelledError:
-                    pass
-
-        @retry(
-            before_sleep=log_retry_attempt,
-            stop=stop_after_attempt(self.config.num_retries),
-            reraise=True,
-            retry=(
-                retry_if_exception_type(self.retry_exceptions)
-                & retry_if_not_exception_type(OperationCancelled)
-            ),
-            wait=custom_completion_wait,
-        )
-        async def async_acompletion_stream_wrapper(*args, **kwargs):
-            """Async wrapper for the litellm acompletion with streaming function."""
-            # some callers might just send the messages directly
-            if 'messages' in kwargs:
-                messages = kwargs['messages']
-            else:
-                messages = args[1] if len(args) > 1 else []
-
-            # log the prompt
-            debug_message = ''
-            for message in messages:
-                debug_message += message_separator + message['content']
-            llm_prompt_logger.debug(debug_message)
-
-            try:
-                # Directly call and await litellm_acompletion
-                resp = await async_completion_unwrapped(*args, **kwargs)
-
-                # For streaming we iterate over the chunks
-                async for chunk in resp:
-                    # Check for cancellation before yielding the chunk
-                    if (
-                        hasattr(self.config, 'on_cancel_requested_fn')
-                        and self.config.on_cancel_requested_fn is not None
-                        and await self.config.on_cancel_requested_fn()
-                    ):
-                        raise UserCancelledError(
-                            'LLM request cancelled due to CANCELLED state'
-                        )
-                    # with streaming, it is "delta", not "message"!
-                    message_back = chunk['choices'][0]['delta']['content']
-                    llm_response_logger.debug(message_back)
-                    self._post_completion(chunk)
-
-                    yield chunk
-
-            except UserCancelledError:
-                logger.info('LLM request cancelled by user.')
-                raise
-            except (
-                APIConnectionError,
-                ContentPolicyViolationError,
-                InternalServerError,
-                NotFoundError,
-                OpenAIError,
-                RateLimitError,
-                ServiceUnavailableError,
-            ) as e:
-                logger.error(f'Completion Error occurred:\n{e}')
-                raise
-
-            finally:
-                if kwargs.get('stream', False):
-                    await asyncio.sleep(0.1)
-
-        self._async_completion = async_completion_wrapper  # type: ignore
-        self._async_streaming_completion = async_acompletion_stream_wrapper  # type: ignore
-
-    def _get_debug_message(self, messages):
-        if not messages:
-            return ''
-
-        messages = messages if isinstance(messages, list) else [messages]
-        return message_separator.join(
-            self._format_message_content(msg) for msg in messages if msg['content']
-        )
-
-    def _format_message_content(self, message):
-        content = message['content']
-        if isinstance(content, list):
-            return self._format_list_content(content)
-        return str(content)
-
-    def _format_list_content(self, content_list):
-        return '\n'.join(
-            self._format_content_element(element) for element in content_list
-        )
-
-    def _format_content_element(self, element):
-        if isinstance(element, dict):
-            if 'text' in element:
-                return element['text']
-            if (
-                self.vision_is_active()
-                and 'image_url' in element
-                and 'url' in element['image_url']
-            ):
-                return element['image_url']['url']
-        return str(element)
-
-    async def _call_acompletion(self, *args, **kwargs):
-        """This is a wrapper for the litellm acompletion function which
-        makes it mockable for testing.
-        """
-        return await litellm.acompletion(*args, **kwargs)
+        self._completion = wrapper

    @property
    def completion(self):
@@ -500,32 +215,7 @@ class LLM:

        Check the complete documentation at https://litellm.vercel.app/docs/completion
        """
-        try:
-            return self._completion
-        except Exception as e:
-            raise LLMResponseError(e)
-
-    @property
-    def async_completion(self):
-        """Decorator for the async litellm acompletion function.
-
-        Check the complete documentation at https://litellm.vercel.app/docs/providers/ollama#example-usage---streaming--acompletion
-        """
-        try:
-            return self._async_completion
-        except Exception as e:
-            raise LLMResponseError(e)
-
-    @property
-    def async_streaming_completion(self):
-        """Decorator for the async litellm acompletion function with streaming.
-
-        Check the complete documentation at https://litellm.vercel.app/docs/providers/ollama#example-usage---streaming--acompletion
-        """
-        try:
-            return self._async_streaming_completion
-        except Exception as e:
-            raise LLMResponseError(e)
+        return self._completion

    def vision_is_active(self):
        return not self.config.disable_vision and self._supports_vision()
@@ -536,38 +226,50 @@ class LLM:
        Returns:
            bool: True if model is vision capable. If model is not supported by litellm, it will return False.
        """
-        try:
-            return litellm.supports_vision(self.config.model)
-        except Exception:
-            return False
-
-    def is_caching_prompt_active(self) -> bool:
-        """Check if prompt caching is enabled and supported for current model.
-
-        Returns:
-            boolean: True if prompt caching is active for the given model.
-        """
-        return self.config.caching_prompt is True and any(
-            model in self.config.model for model in cache_prompting_supported_models
+        # litellm.supports_vision currently returns False for 'openai/gpt-...' or 'anthropic/claude-...' (with prefixes)
+        # but model_info will have the correct value for some reason.
+        # we can go with it, but we will need to keep an eye if model_info is correct for Vertex or other providers
+        # remove when litellm is updated to fix https://github.com/BerriAI/litellm/issues/5608
+        return litellm.supports_vision(self.config.model) or (
+            self.model_info is not None
+            and self.model_info.get('supports_vision', False)
        )

-    def _post_completion(self, response) -> None:
-        """Post-process the completion response."""
+    def is_caching_prompt_active(self) -> bool:
+        """Check if prompt caching is supported and enabled for current model.
+
+        Returns:
+            boolean: True if prompt caching is supported and enabled for the given model.
+        """
+        return (
+            self.config.caching_prompt is True
+            and self.model_info is not None
+            and self.model_info.get('supports_prompt_caching', False)
+            and self.config.model in CACHE_PROMPT_SUPPORTED_MODELS
+        )
+
+    def _post_completion(self, response: ModelResponse) -> None:
+        """Post-process the completion response.
+
+        Logs the cost and usage stats of the completion call.
+        """
        try:
-            cur_cost = self.completion_cost(response)
+            cur_cost = self._completion_cost(response)
        except Exception:
            cur_cost = 0

        stats = ''
        if self.cost_metric_supported:
+            # keep track of the cost
            stats = 'Cost: %.2f USD | Accumulated Cost: %.2f USD\n' % (
                cur_cost,
                self.metrics.accumulated_cost,
            )

-        usage = response.get('usage')
+        usage: Usage | None = response.get('usage')

        if usage:
+            # keep track of the input and output tokens
            input_tokens = usage.get('prompt_tokens')
            output_tokens = usage.get('completion_tokens')

@@ -582,6 +284,7 @@ class LLM:
                    + '\n'
                )

+            # read the prompt caching status as received from the provider
            model_extra = usage.get('model_extra', {})

            cache_creation_input_tokens = model_extra.get('cache_creation_input_tokens')
@@ -598,6 +301,7 @@ class LLM:
                    'Input tokens (cache read): ' + str(cache_read_input_tokens) + '\n'
                )

+        # log the stats
        if stats:
            logger.info(stats)

@@ -616,7 +320,7 @@ class LLM:
            # TODO: this is to limit logspam in case token count is not supported
            return 0

-    def is_local(self):
+    def _is_local(self):
        """Determines if the system is using a locally running LLM.

        Returns:
@@ -631,7 +335,7 @@ class LLM:
                return True
        return False

-    def completion_cost(self, response):
+    def _completion_cost(self, response):
        """Calculate the cost of a completion response based on the model.  Local models are treated as free.
        Add the current cost into total cost in metrics.

@@ -653,10 +357,10 @@ class LLM:
                input_cost_per_token=self.config.input_cost_per_token,
                output_cost_per_token=self.config.output_cost_per_token,
            )
-            logger.debug(f'Using custom cost per token: {cost_per_token}')
+            logger.info(f'Using custom cost per token: {cost_per_token}')
            extra_kwargs['custom_cost_per_token'] = cost_per_token

-        if not self.is_local():
+        if not self._is_local():
            try:
                cost = litellm_completion_cost(
                    completion_response=response, **extra_kwargs
@@ -684,5 +388,12 @@ class LLM:

    def format_messages_for_llm(self, messages: Message | list[Message]) -> list[dict]:
        if isinstance(messages, Message):
-            return [messages.model_dump()]
+            messages = [messages]
+
+        # set flags to know how to serialize the messages
+        for message in messages:
+            message.cache_enabled = self.is_caching_prompt_active()
+            message.vision_enabled = self.vision_is_active()
+
+        # let pydantic handle the serialization
        return [message.model_dump() for message in messages]
--- a/openhands/llm/retry_mixin.py
+++ b/openhands/llm/retry_mixin.py
@@ -0,0 +1,53 @@
+from tenacity import (
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_exponential,
+)
+
+from openhands.core.exceptions import OperationCancelled
+from openhands.core.logger import openhands_logger as logger
+from openhands.runtime.utils.shutdown_listener import should_exit
+
+
+class RetryMixin:
+    """Mixin class for retry logic."""
+
+    def retry_decorator(self, **kwargs):
+        """
+        Create a LLM retry decorator with customizable parameters. This is used for 429 errors, and a few other exceptions in LLM classes.
+
+        Args:
+            **kwargs: Keyword arguments to override default retry behavior.
+                      Keys: num_retries, retry_exceptions, retry_min_wait, retry_max_wait, retry_multiplier
+
+        Returns:
+            A retry decorator with the parameters customizable in configuration.
+        """
+        num_retries = kwargs.get('num_retries')
+        retry_exceptions = kwargs.get('retry_exceptions')
+        retry_min_wait = kwargs.get('retry_min_wait')
+        retry_max_wait = kwargs.get('retry_max_wait')
+        retry_multiplier = kwargs.get('retry_multiplier')
+
+        return retry(
+            before_sleep=self.log_retry_attempt,
+            stop=stop_after_attempt(num_retries),
+            reraise=True,
+            retry=(retry_if_exception_type(retry_exceptions)),
+            wait=wait_exponential(
+                multiplier=retry_multiplier,
+                min=retry_min_wait,
+                max=retry_max_wait,
+            ),
+        )
+
+    def log_retry_attempt(self, retry_state):
+        """Log retry attempts."""
+        if should_exit():
+            raise OperationCancelled('Operation cancelled.')  # exits the @retry loop
+        exception = retry_state.outcome.exception()
+        logger.error(
+            f'{exception}. Attempt #{retry_state.attempt_number} | You can customize retry values in the configuration.',
+            exc_info=False,
+        )
--- a/openhands/llm/streaming_llm.py
+++ b/openhands/llm/streaming_llm.py
@@ -0,0 +1,106 @@
+import asyncio
+from functools import partial
+from typing import Any
+
+from openhands.core.exceptions import UserCancelledError
+from openhands.core.logger import openhands_logger as logger
+from openhands.llm.async_llm import LLM_RETRY_EXCEPTIONS, AsyncLLM
+
+
+class StreamingLLM(AsyncLLM):
+    """Streaming LLM class."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self._async_streaming_completion = partial(
+            self._call_acompletion,
+            model=self.config.model,
+            api_key=self.config.api_key,
+            base_url=self.config.base_url,
+            api_version=self.config.api_version,
+            custom_llm_provider=self.config.custom_llm_provider,
+            max_tokens=self.config.max_output_tokens,
+            timeout=self.config.timeout,
+            temperature=self.config.temperature,
+            top_p=self.config.top_p,
+            drop_params=self.config.drop_params,
+            stream=True,  # Ensure streaming is enabled
+        )
+
+        async_streaming_completion_unwrapped = self._async_streaming_completion
+
+        @self.retry_decorator(
+            num_retries=self.config.num_retries,
+            retry_exceptions=LLM_RETRY_EXCEPTIONS,
+            retry_min_wait=self.config.retry_min_wait,
+            retry_max_wait=self.config.retry_max_wait,
+            retry_multiplier=self.config.retry_multiplier,
+        )
+        async def async_streaming_completion_wrapper(*args, **kwargs):
+            messages: list[dict[str, Any]] | dict[str, Any] = []
+
+            # some callers might send the model and messages directly
+            # litellm allows positional args, like completion(model, messages, **kwargs)
+            # see llm.py for more details
+            if len(args) > 1:
+                messages = args[1] if len(args) > 1 else args[0]
+                kwargs['messages'] = messages
+
+                # remove the first args, they're sent in kwargs
+                args = args[2:]
+            elif 'messages' in kwargs:
+                messages = kwargs['messages']
+
+            # ensure we work with a list of messages
+            messages = messages if isinstance(messages, list) else [messages]
+
+            # if we have no messages, something went very wrong
+            if not messages:
+                raise ValueError(
+                    'The messages list is empty. At least one message is required.'
+                )
+
+            self.log_prompt(messages)
+
+            try:
+                # Directly call and await litellm_acompletion
+                resp = await async_streaming_completion_unwrapped(*args, **kwargs)
+
+                # For streaming we iterate over the chunks
+                async for chunk in resp:
+                    # Check for cancellation before yielding the chunk
+                    if (
+                        hasattr(self.config, 'on_cancel_requested_fn')
+                        and self.config.on_cancel_requested_fn is not None
+                        and await self.config.on_cancel_requested_fn()
+                    ):
+                        raise UserCancelledError(
+                            'LLM request cancelled due to CANCELLED state'
+                        )
+                    # with streaming, it is "delta", not "message"!
+                    message_back = chunk['choices'][0]['delta'].get('content', '')
+                    if message_back:
+                        self.log_response(message_back)
+                    self._post_completion(chunk)
+
+                    yield chunk
+
+            except UserCancelledError:
+                logger.info('LLM request cancelled by user.')
+                raise
+            except Exception as e:
+                logger.error(f'Completion Error occurred:\n{e}')
+                raise
+
+            finally:
+                # sleep for 0.1 seconds to allow the stream to be flushed
+                if kwargs.get('stream', False):
+                    await asyncio.sleep(0.1)
+
+        self._async_streaming_completion = async_streaming_completion_wrapper
+
+    @property
+    def async_streaming_completion(self):
+        """Decorator for the async litellm acompletion function with streaming."""
+        return self._async_streaming_completion
--- a/openhands/memory/condenser.py
+++ b/openhands/memory/condenser.py
@@ -18,7 +18,7 @@ class MemoryCondenser:
            summary_response = resp['choices'][0]['message']['content']
            return summary_response
        except Exception as e:
-            logger.error(f'Error condensing thoughts: {e}', exc_info=False)
+            logger.error('Error condensing thoughts: %s', str(e), exc_info=False)

            # TODO If the llm fails with ContextWindowExceededError, we can try to condense the memory chunk by chunk
            raise
--- a/openhands/memory/memory.py
+++ b/openhands/memory/memory.py
@@ -161,7 +161,7 @@ class LongTermMemory:
            },
        )
        self.thought_idx += 1
-        logger.debug(f'Adding {t} event to memory: {self.thought_idx}')
+        logger.debug('Adding %s event to memory: %d', t, self.thought_idx)
        thread = threading.Thread(target=self._add_doc, args=(doc,))
        self._add_threads.append(thread)
        thread.start()  # We add the doc concurrently so we don't have to wait ~500ms for the insert
--- a/openhands/runtime/browser/browser_env.py
+++ b/openhands/runtime/browser/browser_env.py
@@ -171,7 +171,7 @@ class BrowserEnv:
            response_id, _ = self.agent_side.recv()
            if response_id == 'ALIVE':
                return True
-            logger.debug(f'Browser env is not alive. Response ID: {response_id}')
+            logger.info(f'Browser env is not alive. Response ID: {response_id}')

    def close(self):
        if not self.process.is_alive():
--- a/openhands/runtime/builder/base.py
+++ b/openhands/runtime/builder/base.py
@@ -26,12 +26,13 @@ class RuntimeBuilder(abc.ABC):
        pass

    @abc.abstractmethod
-    def image_exists(self, image_name: str) -> bool:
+    def image_exists(self, image_name: str, pull_from_repo: bool = True) -> bool:
        """
        Check if the runtime image exists.

        Args:
            image_name (str): The name of the runtime image (e.g., "repo:sha").
+            pull_from_repo (bool): Whether to pull from the remote repo if the image not present locally

        Returns:
            bool: Whether the runtime image exists.
--- a/openhands/runtime/builder/docker.py
+++ b/openhands/runtime/builder/docker.py
@@ -1,7 +1,12 @@
+import datetime
+import os
+import subprocess
 import sys
+import time

 import docker

+from openhands import __version__ as oh_version
 from openhands.core.logger import openhands_logger as logger
 from openhands.runtime.builder.base import RuntimeBuilder

@@ -10,45 +15,139 @@ class DockerRuntimeBuilder(RuntimeBuilder):
    def __init__(self, docker_client: docker.DockerClient):
        self.docker_client = docker_client

-    def build(self, path: str, tags: list[str]) -> str:
+        version_info = self.docker_client.version()
+        server_version = version_info.get('Version', '')
+        if tuple(map(int, server_version.split('.'))) < (18, 9):
+            raise RuntimeError('Docker server version must be >= 18.09 to use BuildKit')
+
+        self.max_lines = 10
+        self.log_lines = [''] * self.max_lines
+
+    def build(
+        self,
+        path: str,
+        tags: list[str],
+        use_local_cache: bool = False,
+        extra_build_args: list[str] | None = None,
+    ) -> str:
+        """Builds a Docker image using BuildKit and handles the build logs appropriately.
+
+        Args:
+            path (str): The path to the Docker build context.
+            tags (list[str]): A list of image tags to apply to the built image.
+            use_local_cache (bool, optional): Whether to use and update the local build cache. Defaults to True.
+            extra_build_args (list[str], optional): Additional arguments to pass to the Docker build command. Defaults to None.
+
+        Returns:
+            str: The name of the built Docker image.
+
+        Raises:
+            RuntimeError: If the Docker server version is incompatible or if the build process fails.
+
+        Note:
+            This method uses Docker BuildKit for improved build performance and caching capabilities.
+            If `use_local_cache` is True, it will attempt to use and update the build cache in a local directory.
+            The `extra_build_args` parameter allows for passing additional Docker build arguments as needed.
+        """
+        self.docker_client = docker.from_env()
+        version_info = self.docker_client.version()
+        server_version = version_info.get('Version', '')
+        if tuple(map(int, server_version.split('.'))) < (18, 9):
+            raise RuntimeError('Docker server version must be >= 18.09 to use BuildKit')
+
        target_image_hash_name = tags[0]
        target_image_repo, target_image_hash_tag = target_image_hash_name.split(':')
        target_image_tag = tags[1].split(':')[1] if len(tags) > 1 else None

-        try:
-            build_logs = self.docker_client.api.build(
-                path=path,
-                tag=target_image_hash_name,
-                rm=True,
-                decode=True,
-            )
-        except docker.errors.BuildError as e:
-            logger.error(f'Sandbox image build failed: {e}')
-            raise RuntimeError(f'Sandbox image build failed: {e}')
+        # Check if the image exists and pull if necessary
+        self.image_exists(target_image_repo)

-        layers: dict[str, dict[str, str]] = {}
-        previous_layer_count = 0
-        for log in build_logs:
-            if 'stream' in log:
-                logger.debug(log['stream'].strip())
-            elif 'error' in log:
-                logger.error(log['error'].strip())
-            elif 'status' in log:
-                self._output_build_progress(log, layers, previous_layer_count)
-                previous_layer_count = len(layers)
-            else:
-                logger.debug(str(log))
+        buildx_cmd = [
+            'docker',
+            'buildx',
+            'build',
+            '--progress=plain',
+            f'--build-arg=OPENHANDS_RUNTIME_VERSION={oh_version}',
+            f'--build-arg=OPENHANDS_RUNTIME_BUILD_TIME={datetime.datetime.now().isoformat()}',
+            f'--tag={target_image_hash_name}',
+            '--load',
+        ]
+
+        cache_dir = '/tmp/.buildx-cache'
+        if use_local_cache and self._is_cache_usable(cache_dir):
+            buildx_cmd.extend(
+                [
+                    f'--cache-from=type=local,src={cache_dir}',
+                    f'--cache-to=type=local,dest={cache_dir},mode=max',
+                ]
+            )
+
+        if extra_build_args:
+            buildx_cmd.extend(extra_build_args)
+
+        buildx_cmd.append(path)  # must be last!
+
+        print('================ DOCKER BUILD STARTED ================')
+        if sys.stdout.isatty():
+            sys.stdout.write('\n' * self.max_lines)
+            sys.stdout.flush()
+
+        try:
+            process = subprocess.Popen(
+                buildx_cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                universal_newlines=True,
+                bufsize=1,
+            )
+
+            if process.stdout:
+                for line in iter(process.stdout.readline, ''):
+                    line = line.strip()
+                    if line:
+                        self._output_logs(line)
+
+            return_code = process.wait()
+
+            if return_code != 0:
+                raise subprocess.CalledProcessError(
+                    return_code,
+                    process.args,
+                    output=None,
+                    stderr=None,
+                )
+
+        except subprocess.CalledProcessError as e:
+            logger.error(f'Image build failed:\n{e}')
+            logger.error(f'Command output:\n{e.output}')
+            raise
+
+        except subprocess.TimeoutExpired:
+            logger.error('Image build timed out')
+            raise
+
+        except FileNotFoundError as e:
+            logger.error(f'Python executable not found: {e}')
+            raise
+
+        except PermissionError as e:
+            logger.error(
+                f'Permission denied when trying to execute the build command:\n{e}'
+            )
+            raise
+
+        except Exception as e:
+            logger.error(f'An unexpected error occurred during the build process: {e}')
+            raise

        logger.info(f'Image [{target_image_hash_name}] build finished.')

-        assert (
-            target_image_tag
-        ), f'Expected target image tag [{target_image_tag}] is None'
-        image = self.docker_client.images.get(target_image_hash_name)
-        image.tag(target_image_repo, target_image_tag)
-        logger.info(
-            f'Re-tagged image [{target_image_hash_name}] with more generic tag [{target_image_tag}]'
-        )
+        if target_image_tag:
+            image = self.docker_client.images.get(target_image_hash_name)
+            image.tag(target_image_repo, target_image_tag)
+            logger.info(
+                f'Re-tagged image [{target_image_hash_name}] with more generic tag [{target_image_tag}]'
+            )

        # Check if the image is built successfully
        image = self.docker_client.images.get(target_image_hash_name)
@@ -67,11 +166,12 @@ class DockerRuntimeBuilder(RuntimeBuilder):
        )
        return target_image_hash_name

-    def image_exists(self, image_name: str) -> bool:
+    def image_exists(self, image_name: str, pull_from_repo: bool = True) -> bool:
        """Check if the image exists in the registry (try to pull it first) or in the local store.

        Args:
            image_name (str): The Docker image to check (<image repo>:<image tag>)
+            pull_from_repo (bool): Whether to pull from the remote repo if the image not present locally
        Returns:
            bool: Whether the Docker image exists in the registry or in the local store
        """
@@ -80,14 +180,19 @@ class DockerRuntimeBuilder(RuntimeBuilder):
            return False

        try:
+            logger.debug(f'Checking, if image exists locally:\n{image_name}')
            self.docker_client.images.get(image_name)
-            logger.info(f'Image found locally:\n{image_name}')
+            logger.debug('Image found locally.')
            return True
        except docker.errors.ImageNotFound:
+            if not pull_from_repo:
+                logger.debug(
+                    f'Image {image_name} not found locally'
+                )
+                return False
            try:
-                logger.info(
-                    f'Image not found locally: {image_name}.\n'
-                    'Trying to pull it, please wait...'
+                logger.debug(
+                    'Image not found locally. Trying to pull it, please wait...'
                )

                layers: dict[str, dict[str, str]] = {}
@@ -100,7 +205,7 @@ class DockerRuntimeBuilder(RuntimeBuilder):
                logger.debug('Image pulled')
                return True
            except docker.errors.ImageNotFound:
-                logger.info('Could not find image locally or in registry.')
+                logger.debug('Could not find image locally or in registry.')
                return False
            except Exception as e:
                msg = 'Image could not be pulled: '
@@ -109,9 +214,30 @@ class DockerRuntimeBuilder(RuntimeBuilder):
                    msg += 'image not found in registry.'
                else:
                    msg += f'{ex_msg}'
-                logger.warning(msg)
+                logger.debug(msg)
                return False

+    def _output_logs(self, new_line: str) -> None:
+        """Display the last 10 log_lines in the console (not for file logging).
+        This will create the effect of a rolling display in the console.
+
+        '\033[F'    moves the cursor up one line.
+        '\033[2K\r' clears the line and moves the cursor to the beginning of the line.
+        """
+        if not sys.stdout.isatty():
+            logger.debug(new_line)
+            return
+
+        self.log_lines.pop(0)
+        self.log_lines.append(new_line[:80])
+
+        sys.stdout.write('\033[F' * (self.max_lines))
+        sys.stdout.flush()
+
+        for line in self.log_lines:
+            sys.stdout.write('\033[2K' + line + '\n')
+            sys.stdout.flush()
+
    def _output_build_progress(
        self, current_line: dict, layers: dict, previous_layer_count: int
    ) -> None:
@@ -126,31 +252,93 @@ class DockerRuntimeBuilder(RuntimeBuilder):
            if 'progress' in current_line:
                layers[layer_id]['progress'] = current_line['progress']

-            if (
-                'total' in current_line['progressDetail']
-                and 'current' in current_line['progressDetail']
-            ):
-                total = current_line['progressDetail']['total']
-                current = current_line['progressDetail']['current']
-                percentage = (current / total) * 100
-            else:
-                percentage = 0
+            if 'progressDetail' in current_line:
+                progress_detail = current_line['progressDetail']
+                if 'total' in progress_detail and 'current' in progress_detail:
+                    total = progress_detail['total']
+                    current = progress_detail['current']
+                    percentage = min(
+                        (current / total) * 100, 100
+                    )  # Ensure it doesn't exceed 100%
+                else:
+                    percentage = (
+                        100 if layers[layer_id]['status'] == 'Download complete' else 0
+                    )

-            # refresh process bar in console if stdout is a tty
            if sys.stdout.isatty():
                sys.stdout.write('\033[F' * previous_layer_count)
                for lid, layer_data in sorted(layers.items()):
-                    sys.stdout.write('\033[K')
-                    print(
-                        f'Layer {lid}: {layer_data["progress"]} {layer_data["status"]}'
-                    )
+                    sys.stdout.write('\033[2K\r')
+                    status = layer_data['status']
+                    progress = layer_data['progress']
+                    if status == 'Download complete':
+                        print(f'Layer {lid}: Download complete')
+                    elif status == 'Already exists':
+                        print(f'Layer {lid}: Already exists')
+                    else:
+                        print(f'Layer {lid}: {progress} {status}')
                sys.stdout.flush()
-            # otherwise Log only if percentage is at least 10% higher than last logged
-            elif percentage != 0 and percentage - layers[layer_id]['last_logged'] >= 10:
-                logger.info(
+            elif percentage != 0 and (
+                percentage - layers[layer_id]['last_logged'] >= 10 or percentage == 100
+            ):
+                logger.debug(
                    f'Layer {layer_id}: {layers[layer_id]["progress"]} {layers[layer_id]["status"]}'
                )

            layers[layer_id]['last_logged'] = percentage
        elif 'status' in current_line:
-            logger.info(current_line['status'])
+            logger.debug(current_line['status'])
+
+    def _prune_old_cache_files(self, cache_dir: str, max_age_days: int = 7) -> None:
+        """
+        Prune cache files older than the specified number of days.
+
+        Args:
+            cache_dir (str): The path to the cache directory.
+            max_age_days (int): The maximum age of cache files in days.
+        """
+        try:
+            current_time = time.time()
+            max_age_seconds = max_age_days * 24 * 60 * 60
+
+            for root, _, files in os.walk(cache_dir):
+                for file in files:
+                    file_path = os.path.join(root, file)
+                    try:
+                        file_age = current_time - os.path.getmtime(file_path)
+                        if file_age > max_age_seconds:
+                            os.remove(file_path)
+                            logger.debug(f'Removed old cache file: {file_path}')
+                    except Exception as e:
+                        logger.warning(f'Error processing cache file {file_path}: {e}')
+        except Exception as e:
+            logger.warning(f'Error during build cache pruning: {e}')
+
+    def _is_cache_usable(self, cache_dir: str) -> bool:
+        """
+        Check if the cache directory is usable (exists and is writable).
+
+        Args:
+            cache_dir (str): The path to the cache directory.
+
+        Returns:
+            bool: True if the cache directory is usable, False otherwise.
+        """
+        if not os.path.exists(cache_dir):
+            try:
+                os.makedirs(cache_dir, exist_ok=True)
+                logger.debug(f'Created cache directory: {cache_dir}')
+            except OSError as e:
+                logger.debug(f'Failed to create cache directory {cache_dir}: {e}')
+                return False
+
+        if not os.access(cache_dir, os.W_OK):
+            logger.warning(
+                f'Cache directory {cache_dir} is not writable. Caches will not be used for Docker builds.'
+            )
+            return False
+
+        self._prune_old_cache_files(cache_dir)
+
+        logger.debug(f'Cache directory {cache_dir} is usable')
+        return True
--- a/openhands/runtime/builder/remote.py
+++ b/openhands/runtime/builder/remote.py
@@ -98,7 +98,7 @@ class RemoteRuntimeBuilder(RuntimeBuilder):
            # Wait before polling again
            sleep_if_should_continue(30)

-    def image_exists(self, image_name: str) -> bool:
+    def image_exists(self, image_name: str, pull_from_repo: bool = True) -> bool:
        """Checks if an image exists in the remote registry using the /image_exists endpoint."""
        params = {'image': image_name}
        response = send_request(
--- a/openhands/runtime/client/client.py
+++ b/openhands/runtime/client/client.py
@@ -11,6 +11,7 @@ import os
 import re
 import shutil
 import subprocess
+import time
 from contextlib import asynccontextmanager
 from pathlib import Path

@@ -86,6 +87,8 @@ class RuntimeClient:
        self.lock = asyncio.Lock()
        self.plugins: dict[str, Plugin] = {}
        self.browser = BrowserEnv(browsergym_eval_env)
+        self.start_time = time.time()
+        self.last_execution_time = self.start_time

    @property
    def initial_pwd(self):
@@ -95,7 +98,7 @@ class RuntimeClient:
        for plugin in self.plugins_to_load:
            await plugin.initialize(self.username)
            self.plugins[plugin.name] = plugin
-            logger.debug(f'Initializing plugin: {plugin.name}')
+            logger.info(f'Initializing plugin: {plugin.name}')

            if isinstance(plugin, JupyterPlugin):
                await self.run_ipython(
@@ -111,7 +114,7 @@ class RuntimeClient:
                    code='from openhands.runtime.plugins.agent_skills.agentskills import *\n'
                )
            )
-            logger.debug(f'AgentSkills initialized: {obs}')
+            logger.info(f'AgentSkills initialized: {obs}')

        await self._init_bash_commands()
        logger.info('Runtime client initialized.')
@@ -136,7 +139,7 @@ class RuntimeClient:
        """

        # First create the working directory, independent of the user
-        logger.debug(f'Client working directory: {self.initial_pwd}')
+        logger.info(f'Client working directory: {self.initial_pwd}')
        command = f'umask 002; mkdir -p {self.initial_pwd}'
        output = subprocess.run(command, shell=True, capture_output=True)
        out_str = output.stdout.decode()
@@ -237,7 +240,7 @@ class RuntimeClient:
        self.shell.expect(self.__bash_expect_regex)

    async def _init_bash_commands(self):
-        logger.debug(f'Initializing by running {len(INIT_COMMANDS)} bash commands...')
+        logger.info(f'Initializing by running {len(INIT_COMMANDS)} bash commands...')
        for command in INIT_COMMANDS:
            action = CmdRunAction(command=command)
            action.timeout = 300
@@ -248,7 +251,7 @@ class RuntimeClient:
            )
            assert obs.exit_code == 0

-        logger.debug('Bash init commands completed')
+        logger.info('Bash init commands completed')

    def _get_bash_prompt_and_update_pwd(self):
        ps1 = self.shell.after
@@ -320,7 +323,13 @@ class RuntimeClient:
            logger.debug('Requesting exit code...')
            self.shell.expect(self.__bash_expect_regex, timeout=timeout)
            _exit_code_output = self.shell.before
-            exit_code = int(_exit_code_output.strip().split()[0])
+            try:
+                exit_code = int(_exit_code_output.strip().split()[0])
+            except:
+                logger.error('Error getting exit code from bash script')
+                # If we try to run an invalid shell script the output sometimes includes error text
+                # rather than the error code - we assume this is an error
+                exit_code = 2

        except pexpect.TIMEOUT as e:
            if kill_on_timeout:
@@ -600,6 +609,14 @@ if __name__ == '__main__':
            response = await call_next(request)
        return response

+    @app.get('/server_info')
+    async def get_server_info():
+        assert client is not None
+        current_time = time.time()
+        uptime = current_time - client.start_time
+        idle_time = current_time - client.last_execution_time
+        return {'uptime': uptime, 'idle_time': idle_time}
+
    @app.post('/execute_action')
    async def execute_action(action_request: ActionRequest):
        assert client is not None
@@ -607,10 +624,11 @@ if __name__ == '__main__':
            action = event_from_dict(action_request.action)
            if not isinstance(action, Action):
                raise HTTPException(status_code=400, detail='Invalid action type')
+            client.last_execution_time = time.time()
            observation = await client.run_action(action)
            return event_to_dict(observation)
        except Exception as e:
-            logger.error(f'Error processing command: {str(e)}')
+            logger.error(f'Error processing command: {str(e)}', exc_info=True, stack_info=True)
            raise HTTPException(status_code=500, detail=str(e))

    @app.post('/upload_file')
@@ -753,8 +771,7 @@ if __name__ == '__main__':
            logger.error(f'Error listing files: {e}', exc_info=True)
            return []

-    logger.info(
-        f'Runtime client initialized.'
-        f'Starting action execution API on port {args.port}'
-    )
+    logger.info('Runtime client initialized.')
+
+    logger.info(f'Starting action execution API on port {args.port}')
    run(app, host='0.0.0.0', port=args.port)
--- a/openhands/runtime/client/runtime.py
+++ b/openhands/runtime/client/runtime.py
@@ -10,6 +10,7 @@ import requests
 import tenacity

 from openhands.core.config import AppConfig
+from openhands.core.logger import DEBUG
 from openhands.core.logger import openhands_logger as logger
 from openhands.events import EventStream
 from openhands.events.action import (
@@ -125,9 +126,7 @@ class EventStreamRuntime(Runtime):
        self.config = config
        self._host_port = 30000  # initial dummy value
        self._container_port = 30001  # initial dummy value
-        self.api_url = (
-            f'http://{self.config.sandbox.api_hostname}:{self._container_port}'
-        )
+        self.api_url = f'{self.config.sandbox.local_runtime_url}:{self._container_port}'
        self.session = requests.Session()
        self.instance_id = (
            sid + '_' + str(uuid.uuid4()) if sid is not None else str(uuid.uuid4())
@@ -153,11 +152,9 @@ class EventStreamRuntime(Runtime):
                f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}'
            )

-        # container logs can be skipped by setting the SKIP_CONTAINER_LOGS env var to true or 1
-        self.skip_container_logs = os.getenv(
-            'SKIP_CONTAINER_LOGS', 'false'
-        ).lower() in ['true', '1']
-
+        self.skip_container_logs = (
+            os.environ.get('SKIP_CONTAINER_LOGS', 'false').lower() == 'true'
+        )
        if self.runtime_container_image is None:
            if self.base_container_image is None:
                raise ValueError(
@@ -227,7 +224,7 @@ class EventStreamRuntime(Runtime):
                self._host_port
            )  # in future this might differ from host port
            self.api_url = (
-                f'http://{self.config.sandbox.api_hostname}:{self._container_port}'
+                f'{self.config.sandbox.local_runtime_url}:{self._container_port}'
            )

            use_host_network = self.config.sandbox.use_host_network
@@ -250,7 +247,7 @@ class EventStreamRuntime(Runtime):
                'port': str(self._container_port),
                'PYTHONUNBUFFERED': 1,
            }
-            if self.config.debug:
+            if self.config.debug or DEBUG:
                environment['DEBUG'] = 'true'

            logger.debug(f'Workspace Base: {self.config.workspace_base}')
@@ -293,7 +290,7 @@ class EventStreamRuntime(Runtime):
                volumes=volumes,
            )
            self.log_buffer = LogBuffer(container)
-            logger.debug(f'Container started. Server url: {self.api_url}')
+            logger.info(f'Container started. Server url: {self.api_url}')
            self.send_status_message('STATUS$CONTAINER_STARTED')
            return container
        except Exception as e:
@@ -510,7 +507,7 @@ class EventStreamRuntime(Runtime):
        finally:
            if recursive:
                os.unlink(temp_zip_path)
-            logger.debug(f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}')
+            logger.info(f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}')
            self._refresh_logs()

    def list_files(self, path: str | None = None) -> list[str]:
--- a/openhands/runtime/plugins/agent_skills/file_ops/file_ops.py
+++ b/openhands/runtime/plugins/agent_skills/file_ops/file_ops.py
@@ -22,10 +22,7 @@ import shutil
 import tempfile
 import uuid

-if __package__ is None or __package__ == '':
-    from aider import Linter
-else:
-    from openhands.runtime.plugins.agent_skills.utils.aider import Linter
+from openhands.linter import DefaultLinter, LintResult

 CURRENT_FILE: str | None = None
 CURRENT_LINE = 1
@@ -98,13 +95,16 @@ def _lint_file(file_path: str) -> tuple[str | None, int | None]:
    Returns:
        tuple[str | None, int | None]: (lint_error, first_error_line_number)
    """
-    linter = Linter(root=os.getcwd())
-    lint_error = linter.lint(file_path)
+    linter = DefaultLinter()
+    lint_error: list[LintResult] = linter.lint(file_path)
    if not lint_error:
        # Linting successful. No issues found.
        return None, None
-    first_error_line = lint_error.lines[0] if lint_error.lines else None
-    return 'ERRORS:\n' + lint_error.text, first_error_line
+    first_error_line = lint_error[0].line if len(lint_error) > 0 else None
+    error_text = 'ERRORS:\n' + '\n'.join(
+        [f'{file_path}:{err.line}:{err.column}: {err.message}' for err in lint_error]
+    )
+    return error_text, first_error_line


 def _print_window(
@@ -518,7 +518,8 @@ def _edit_file_impl(
            with open(original_file_backup_path, 'w') as f:
                f.writelines(lines)

-            lint_error, first_error_line = _lint_file(file_name)
+            file_name_abs = os.path.abspath(file_name)
+            lint_error, first_error_line = _lint_file(file_name_abs)

            # Select the errors caused by the modification
            def extract_last_part(line):
@@ -786,7 +787,6 @@ def append_file(file_name: str, content: str) -> None:

    Args:
        file_name: str: The name of the file to edit.
-        line_number: int: The line number (starting from 1) to insert the content after.
        content: str: The content to insert.
    """
    ret_str = _edit_file_impl(
--- a/openhands/runtime/plugins/agent_skills/utils/aider/LICENSE.txt
+++ b/openhands/runtime/plugins/agent_skills/utils/aider/LICENSE.txt
@@ -1,202 +0,0 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--- a/openhands/runtime/plugins/agent_skills/utils/aider/README.md
+++ b/openhands/runtime/plugins/agent_skills/utils/aider/README.md
@@ -1,8 +0,0 @@
-# Aider is AI pair programming in your terminal
-
-Aider lets you pair program with LLMs,
-to edit code in your local git repository.
-
-Please see the [original repository](https://github.com/paul-gauthier/aider) for more information.
-
-OpenHands has adapted and integrated its linter module ([original code](https://github.com/paul-gauthier/aider/blob/main/aider/linter.py)).
--- a/openhands/runtime/plugins/agent_skills/utils/aider/init.py
+++ b/openhands/runtime/plugins/agent_skills/utils/aider/init.py
@@ -1,9 +0,0 @@
-if __package__ is None or __package__ == '':
-    from linter import Linter, LintResult
-else:
-    from openhands.runtime.plugins.agent_skills.utils.aider.linter import (
-        Linter,
-        LintResult,
-    )
-
-__all__ = ['Linter', 'LintResult']
--- a/openhands/runtime/plugins/agent_skills/utils/aider/linter.py
+++ b/openhands/runtime/plugins/agent_skills/utils/aider/linter.py
@@ -1,378 +0,0 @@
-import json
-import os
-import subprocess
-import sys
-import tempfile
-import traceback
-import warnings
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Optional
-
-from grep_ast import TreeContext, filename_to_lang
-from tree_sitter_languages import get_parser  # noqa: E402
-
-# tree_sitter is throwing a FutureWarning
-warnings.simplefilter('ignore', category=FutureWarning)
-
-
-@dataclass
-class LintResult:
-    text: str
-    lines: list
-
-
-class Linter:
-    def __init__(self, encoding='utf-8', root=None):
-        self.encoding = encoding
-        self.root = root
-
-        self.ts_installed = self._check_tool_installed('tsc')
-        self.eslint_installed = self._check_tool_installed('eslint')
-
-        self.languages = dict(
-            python=self.py_lint,
-        )
-        if self.eslint_installed:
-            self.languages['javascript'] = self.ts_eslint
-            self.languages['typescript'] = self.ts_eslint
-        elif self.ts_installed:
-            self.languages['javascript'] = self.ts_tsc_lint
-            self.languages['typescript'] = self.ts_tsc_lint
-        self.all_lint_cmd = None
-
-    def set_linter(self, lang, cmd):
-        if lang:
-            self.languages[lang] = cmd
-            return
-
-        self.all_lint_cmd = cmd
-
-    def get_rel_fname(self, fname):
-        if self.root:
-            return os.path.relpath(fname, self.root)
-        else:
-            return fname
-
-    def run_cmd(self, cmd, rel_fname, code):
-        cmd += ' ' + rel_fname
-        cmd = cmd.split()
-
-        process = subprocess.Popen(
-            cmd,
-            cwd=self.root,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            stdin=subprocess.PIPE,  # Add stdin parameter
-        )
-        stdout, _ = process.communicate(
-            input=code.encode()
-        )  # Pass the code to the process
-        errors = stdout.decode().strip()
-        self.returncode = process.returncode
-        if self.returncode == 0:
-            return  # zero exit status
-
-        cmd = ' '.join(cmd)
-        res = ''
-        res += errors
-        line_num = extract_error_line_from(res)
-        return LintResult(text=res, lines=[line_num])
-
-    def get_abs_fname(self, fname):
-        if os.path.isabs(fname):
-            return fname
-        elif os.path.isfile(fname):
-            rel_fname = self.get_rel_fname(fname)
-            return os.path.abspath(rel_fname)
-        else:  # if a temp file
-            return self.get_rel_fname(fname)
-
-    def lint(self, fname, cmd=None) -> LintResult | None:
-        code = Path(fname).read_text(self.encoding)
-        absolute_fname = self.get_abs_fname(fname)
-        if cmd:
-            cmd = cmd.strip()
-        if not cmd:
-            lang = filename_to_lang(fname)
-            if not lang:
-                return None
-            if self.all_lint_cmd:
-                cmd = self.all_lint_cmd
-            else:
-                cmd = self.languages.get(lang)
-        if callable(cmd):
-            linkres = cmd(fname, absolute_fname, code)
-        elif cmd:
-            linkres = self.run_cmd(cmd, absolute_fname, code)
-        else:
-            linkres = basic_lint(absolute_fname, code)
-        return linkres
-
-    def flake_lint(self, rel_fname, code):
-        fatal = 'F821,F822,F831,E112,E113,E999,E902'
-        flake8 = f'flake8 --select={fatal} --isolated'
-
-        try:
-            flake_res = self.run_cmd(flake8, rel_fname, code)
-        except FileNotFoundError:
-            flake_res = None
-        return flake_res
-
-    def py_lint(self, fname, rel_fname, code):
-        error = self.flake_lint(rel_fname, code)
-        if not error:
-            error = lint_python_compile(fname, code)
-        if not error:
-            error = basic_lint(rel_fname, code)
-        return error
-
-    def _check_tool_installed(self, tool_name: str) -> bool:
-        """Check if a tool is installed."""
-        try:
-            subprocess.run(
-                [tool_name, '--version'],
-                check=True,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-            )
-            return True
-        except (subprocess.CalledProcessError, FileNotFoundError):
-            return False
-
-    def print_lint_result(self, lint_result: LintResult) -> None:
-        print(f'\n{lint_result.text.strip()}')
-        if isinstance(lint_result.lines, list) and lint_result.lines:
-            if isinstance(lint_result.lines[0], LintResult):
-                self.print_lint_result(lint_result.lines[0])
-
-    def ts_eslint(self, fname: str, rel_fname: str, code: str) -> Optional[LintResult]:
-        """Use ESLint to check for errors. If ESLint is not installed return None."""
-        if not self.eslint_installed:
-            return None
-
-        # Enhanced ESLint configuration with React support
-        eslint_config = {
-            'env': {'es6': True, 'browser': True, 'node': True},
-            'extends': ['eslint:recommended', 'plugin:react/recommended'],
-            'parserOptions': {
-                'ecmaVersion': 2021,
-                'sourceType': 'module',
-                'ecmaFeatures': {'jsx': True},
-            },
-            'plugins': ['react'],
-            'rules': {
-                'no-unused-vars': 'warn',
-                'no-console': 'off',
-                'react/prop-types': 'warn',
-                'semi': ['error', 'always'],
-            },
-            'settings': {'react': {'version': 'detect'}},
-        }
-
-        # Write config to a temporary file
-        with tempfile.NamedTemporaryFile(
-            mode='w', suffix='.json', delete=False
-        ) as temp_config:
-            json.dump(eslint_config, temp_config)
-            temp_config_path = temp_config.name
-
-        try:
-            # Point to frontend node_modules directory
-            if self.root:
-                plugin_path = f'{self.root}/frontend/node_modules/'
-            else:
-                return None
-
-            eslint_cmd = f'eslint --no-eslintrc --config {temp_config_path} --resolve-plugins-relative-to {plugin_path} --format json'
-            eslint_res = ''
-            try:
-                eslint_res = self.run_cmd(eslint_cmd, rel_fname, code)
-                if eslint_res and hasattr(eslint_res, 'text'):
-                    # Parse the ESLint JSON output
-                    eslint_output = json.loads(eslint_res.text)
-                    error_lines = []
-                    error_messages = []
-                    for result in eslint_output:
-                        for message in result.get('messages', []):
-                            line = message.get('line', 0)
-                            error_lines.append(line)
-                            error_messages.append(
-                                f"{rel_fname}:{line}:{message.get('column', 0)}: {message.get('message')} ({message.get('ruleId')})"
-                            )
-                    if not error_messages:
-                        return None
-
-                    return LintResult(text='\n'.join(error_messages), lines=error_lines)
-            except json.JSONDecodeError as e:
-                return LintResult(text=f'\nJSONDecodeError: {e}', lines=[eslint_res])
-            except FileNotFoundError:
-                return None
-            except Exception as e:
-                return LintResult(text=f'\nUnexpected error: {e}', lines=[])
-        finally:
-            os.unlink(temp_config_path)
-        return None
-
-    def ts_tsc_lint(self, fname, rel_fname, code):
-        """Use typescript compiler to check for errors. If TypeScript is not installed return None."""
-        if self.ts_installed:
-            tsc_cmd = 'tsc --noEmit --allowJs --checkJs --strict --noImplicitAny --strictNullChecks --strictFunctionTypes --strictBindCallApply --strictPropertyInitialization --noImplicitThis --alwaysStrict'
-            try:
-                tsc_res = self.run_cmd(tsc_cmd, rel_fname, code)
-                if tsc_res:
-                    # Parse the TSC output
-                    error_lines = []
-                    for line in tsc_res.text.split('\n'):
-                        # Extract lines and column numbers
-                        if ': error TS' in line or ': warning TS' in line:
-                            try:
-                                location_part = line.split('(')[1].split(')')[0]
-                                line_num, _ = map(int, location_part.split(','))
-                                error_lines.append(line_num)
-                            except (IndexError, ValueError):
-                                continue
-                    return LintResult(text=tsc_res.text, lines=error_lines)
-            except FileNotFoundError:
-                pass
-
-        # If still no errors, check for missing semicolons
-        lines = code.split('\n')
-        error_lines = []
-        for i, line in enumerate(lines):
-            stripped_line = line.strip()
-            if (
-                stripped_line
-                and not stripped_line.endswith(';')
-                and not stripped_line.endswith('{')
-                and not stripped_line.endswith('}')
-                and not stripped_line.startswith('//')
-            ):
-                error_lines.append(i + 1)
-
-        if error_lines:
-            error_message = (
-                f"{rel_fname}({error_lines[0]},1): error TS1005: ';' expected."
-            )
-            return LintResult(text=error_message, lines=error_lines)
-
-        # If tsc is not available return None (basic_lint causes other problems!)
-        return None
-
-
-def lint_python_compile(fname, code):
-    try:
-        compile(code, fname, 'exec')  # USE TRACEBACK BELOW HERE
-        return
-    except IndentationError as err:
-        end_lineno = getattr(err, 'end_lineno', err.lineno)
-        if isinstance(end_lineno, int):
-            line_numbers = list(range(end_lineno - 1, end_lineno))
-        else:
-            line_numbers = []
-
-        tb_lines = traceback.format_exception(type(err), err, err.__traceback__)
-        last_file_i = 0
-
-        target = '# USE TRACEBACK'
-        target += ' BELOW HERE'
-        for i in range(len(tb_lines)):
-            if target in tb_lines[i]:
-                last_file_i = i
-                break
-        tb_lines = tb_lines[:1] + tb_lines[last_file_i + 1 :]
-
-    res = ''.join(tb_lines)
-    return LintResult(text=res, lines=line_numbers)
-
-
-def basic_lint(fname, code):
-    """Use tree-sitter to look for syntax errors, display them with tree context."""
-    lang = filename_to_lang(fname)
-    if not lang:
-        return
-
-    parser = get_parser(lang)
-    tree = parser.parse(bytes(code, 'utf-8'))
-
-    errors = traverse_tree(tree.root_node)
-    if not errors:
-        return
-
-    error_messages = [
-        f'{fname}:{line}:{col}: {error_details}' for line, col, error_details in errors
-    ]
-    return LintResult(
-        text='\n'.join(error_messages), lines=[line for line, _, _ in errors]
-    )
-
-
-def extract_error_line_from(lint_error):
-    # TODO: this is a temporary fix to extract the error line from the error message
-    # it should be replaced with a more robust/unified solution
-    first_error_line = None
-    for line in lint_error.splitlines(True):
-        if line.strip():
-            # The format of the error message is: <filename>:<line>:<column>: <error code> <error message>
-            parts = line.split(':')
-            if len(parts) >= 2:
-                try:
-                    first_error_line = int(parts[1])
-                    break
-                except ValueError:
-                    continue
-    return first_error_line
-
-
-def tree_context(fname, code, line_nums):
-    context = TreeContext(
-        fname,
-        code,
-        color=False,
-        line_number=True,
-        child_context=False,
-        last_line=False,
-        margin=0,
-        mark_lois=True,
-        loi_pad=3,
-        # header_max=30,
-        show_top_of_file_parent_scope=False,
-    )
-    line_nums = set(line_nums)
-    context.add_lines_of_interest(line_nums)
-    context.add_context()
-    output = context.format()
-
-    return output
-
-
-def traverse_tree(node):
-    """Traverses the tree to find errors"""
-    errors = []
-    if node.type == 'ERROR' or node.is_missing:
-        line_no = node.start_point[0] + 1
-        col_no = node.start_point[1] + 1
-        error_type = 'Missing node' if node.is_missing else 'Syntax error'
-        errors.append((line_no, col_no, error_type))
-
-    for child in node.children:
-        errors += traverse_tree(child)
-
-    return errors
-
-
-def main():
-    """Main function to parse files provided as command line arguments."""
-    if len(sys.argv) < 2:
-        print('Usage: python linter.py <file1> <file2> ...')
-        sys.exit(1)
-
-    linter = Linter(root=os.getcwd())
-    for file_path in sys.argv[1:]:
-        errors = linter.lint(file_path)
-        if errors:
-            print(errors)
-
-
-if __name__ == '__main__':
-    main()
--- a/openhands/runtime/remote/runtime.py
+++ b/openhands/runtime/remote/runtime.py
@@ -59,13 +59,6 @@ class RemoteRuntime(Runtime):
        status_message_callback: Optional[Callable] = None,
    ):
        self.config = config
-        if self.config.sandbox.api_hostname == 'localhost':
-            self.config.sandbox.api_hostname = 'api.all-hands.dev/v0/runtime'
-            logger.warning(
-                'Using localhost as the API hostname is not supported in the RemoteRuntime. Please set a proper hostname.\n'
-                'Setting it to default value: api.all-hands.dev/v0/runtime'
-            )
-        self.api_url = f'https://{self.config.sandbox.api_hostname.rstrip("/")}'

        if self.config.sandbox.api_key is None:
            raise ValueError(
@@ -82,7 +75,7 @@ class RemoteRuntime(Runtime):
            )

        self.runtime_builder = RemoteRuntimeBuilder(
-            self.api_url, self.config.sandbox.api_key
+            self.config.sandbox.remote_runtime_api_url, self.config.sandbox.api_key
        )
        self.runtime_id: str | None = None
        self.runtime_url: str | None = None
@@ -97,7 +90,11 @@ class RemoteRuntime(Runtime):
        self.container_image: str = self.config.sandbox.base_container_image
        self.container_name = 'oh-remote-runtime-' + self.instance_id
        logger.debug(f'RemoteRuntime `{sid}` config:\n{self.config}')
-        response = send_request(self.session, 'GET', f'{self.api_url}/registry_prefix')
+        response = send_request(
+            self.session,
+            'GET',
+            f'{self.config.sandbox.remote_runtime_api_url}/registry_prefix',
+        )
        response_json = response.json()
        registry_prefix = response_json['registry_prefix']
        os.environ['OH_RUNTIME_RUNTIME_IMAGE_REPO'] = (
@@ -123,7 +120,7 @@ class RemoteRuntime(Runtime):
        response = send_request(
            self.session,
            'GET',
-            f'{self.api_url}/image_exists',
+            f'{self.config.sandbox.remote_runtime_api_url}/image_exists',
            params={'image': self.container_image},
        )
        if response.status_code != 200 or not response.json()['exists']:
@@ -157,7 +154,10 @@ class RemoteRuntime(Runtime):

        # Start the sandbox using the /start endpoint
        response = send_request(
-            self.session, 'POST', f'{self.api_url}/start', json=start_request
+            self.session,
+            'POST',
+            f'{self.config.sandbox.remote_runtime_api_url}/start',
+            json=start_request,
        )
        if response.status_code != 201:
            raise RuntimeError(f'Failed to start sandbox: {response.text}')
@@ -215,7 +215,7 @@ class RemoteRuntime(Runtime):
                response = send_request(
                    self.session,
                    'POST',
-                    f'{self.api_url}/stop',
+                    f'{self.config.sandbox.remote_runtime_api_url}/stop',
                    json={'runtime_id': self.runtime_id},
                )
                if response.status_code != 200:
@@ -246,7 +246,7 @@ class RemoteRuntime(Runtime):
            assert action.timeout is not None

            try:
-                logger.debug('Executing action')
+                logger.info('Executing action')
                request_body = {'action': event_to_dict(action)}
                logger.debug(f'Request body: {request_body}')
                response = send_request(
@@ -338,7 +338,7 @@ class RemoteRuntime(Runtime):
                ),
            )
            if response.status_code == 200:
-                logger.debug(
+                logger.info(
                    f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}. Response: {response.text}'
                )
                return
@@ -352,7 +352,7 @@ class RemoteRuntime(Runtime):
        finally:
            if recursive:
                os.unlink(temp_zip_path)
-            logger.debug(f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}')
+            logger.info(f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}')

    def list_files(self, path: str | None = None) -> list[str]:
        self._wait_until_alive()
--- a/openhands/runtime/runtime.py
+++ b/openhands/runtime/runtime.py
@@ -92,7 +92,7 @@ class Runtime:
                code += f'os.environ["{key}"] = {json.dumps(value)}\n'
            code += '\n'
            obs = self.run_ipython(IPythonRunCellAction(code))
-            logger.debug(f'Added env vars to IPython: code={code}, obs={obs}')
+            logger.info(f'Added env vars to IPython: code={code}, obs={obs}')

        # Add env vars to the Bash shell
        cmd = ''
--- a/openhands/runtime/utils/runtime_build.py
+++ b/openhands/runtime/utils/runtime_build.py
@@ -6,11 +6,11 @@ import subprocess
 import tempfile

 import docker
-import toml
 from dirhash import dirhash
 from jinja2 import Environment, FileSystemLoader

 import openhands
+from openhands import __version__ as oh_version
 from openhands.core.logger import openhands_logger as logger
 from openhands.runtime.builder import DockerRuntimeBuilder, RuntimeBuilder

@@ -19,19 +19,6 @@ def get_runtime_image_repo():
    return os.getenv('OH_RUNTIME_RUNTIME_IMAGE_REPO', 'ghcr.io/all-hands-ai/runtime')


-def _get_package_version():
-    """Read the version from pyproject.toml.
-
-    Returns:
-    - The version specified in pyproject.toml under [tool.poetry]
-    """
-    project_root = os.path.dirname(os.path.dirname(os.path.abspath(openhands.__file__)))
-    pyproject_path = os.path.join(project_root, 'pyproject.toml')
-    with open(pyproject_path, 'r') as f:
-        pyproject_data = toml.load(f)
-    return pyproject_data['tool']['poetry']['version']
-
-
 def _put_source_code_to_dir(temp_dir: str):
    """Builds the project source tarball directly in temp_dir and unpacks it.
    The OpenHands source code ends up in the temp_dir/code directory.
@@ -43,10 +30,10 @@ def _put_source_code_to_dir(temp_dir: str):
        raise RuntimeError(f'Temp directory {temp_dir} does not exist')

    project_root = os.path.dirname(os.path.dirname(os.path.abspath(openhands.__file__)))
-    logger.debug(f'Building source distribution using project root: {project_root}')
+    logger.info(f'Building source distribution using project root: {project_root}')

    # Fetch the correct version from pyproject.toml
-    package_version = _get_package_version()
+    package_version = oh_version
    tarball_filename = f'openhands_ai-{package_version}.tar.gz'
    tarball_path = os.path.join(temp_dir, tarball_filename)

@@ -60,7 +47,7 @@ def _put_source_code_to_dir(temp_dir: str):
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
-    logger.debug(result.stdout.decode())
+    logger.info(result.stdout.decode())
    err_logs = result.stderr.decode()
    if err_logs:
        logger.error(err_logs)
@@ -72,7 +59,7 @@ def _put_source_code_to_dir(temp_dir: str):
    if not os.path.exists(tarball_path):
        logger.error(f'Source distribution not found at {tarball_path}')
        raise RuntimeError(f'Source distribution not found at {tarball_path}')
-    logger.debug(f'Source distribution created at {tarball_path}')
+    logger.info(f'Source distribution created at {tarball_path}')

    # Unzip the tarball
    shutil.unpack_archive(tarball_path, temp_dir)
@@ -83,7 +70,7 @@ def _put_source_code_to_dir(temp_dir: str):
        os.path.join(temp_dir, f'openhands_ai-{package_version}'),
        os.path.join(temp_dir, 'code'),
    )
-    logger.debug(f'Unpacked source code directory: {os.path.join(temp_dir, "code")}')
+    logger.info(f'Unpacked source code directory: {os.path.join(temp_dir, "code")}')


 def _generate_dockerfile(
@@ -142,9 +129,7 @@ def prep_docker_build_folder(
        skip_init=skip_init,
        extra_deps=extra_deps,
    )
-
-    # Write or skip container logs
-    if os.getenv('SKIP_CONTAINER_LOGS', 'false').lower() not in ['true', '1']:
+    if os.getenv('SKIP_CONTAINER_LOGS', 'false') != 'true':
        logger.debug(
            (
                f'===== Dockerfile content start =====\n'
@@ -156,14 +141,23 @@ def prep_docker_build_folder(
        file.write(dockerfile_content)

    # Get the MD5 hash of the dir_path directory
-    dist_hash = dirhash(dir_path, 'md5')
-    logger.debug(
+    dir_hash = dirhash(
+        dir_path,
+        'md5',
+        ignore=[
+            '.*/',  # hidden directories
+            '__pycache__/',
+            '*.pyc',
+        ],
+    )
+    hash = f'v{oh_version}_{dir_hash}'
+    logger.info(
        f'Input base image: {base_image}\n'
        f'Skip init: {skip_init}\n'
        f'Extra deps: {extra_deps}\n'
-        f'Hash for docker build directory [{dir_path}] (contents: {os.listdir(dir_path)}): {dist_hash}\n'
+        f'Hash for docker build directory [{dir_path}] (contents: {os.listdir(dir_path)}): {hash}\n'
    )
-    return dist_hash
+    return hash


 def get_runtime_image_repo_and_tag(base_image: str) -> tuple[str, str]:
@@ -190,7 +184,6 @@ def get_runtime_image_repo_and_tag(base_image: str) -> tuple[str, str]:
        if ':' not in base_image:
            base_image = base_image + ':latest'
        [repo, tag] = base_image.split(':')
-        oh_version = _get_package_version()

        # Hash the repo if it's too long
        if len(repo) > 32:
@@ -258,7 +251,7 @@ def build_runtime_image(

    # Scenario 1: If we already have an image with the exact same hash, then it means the image is already built
    # with the exact same source code and Dockerfile, so we will reuse it. Building it is not required.
-    if not force_rebuild and runtime_builder.image_exists(hash_runtime_image_name):
+    if not force_rebuild and runtime_builder.image_exists(hash_runtime_image_name, False):
        logger.info(
            f'Image [{hash_runtime_image_name}] already exists so we will reuse it.'
        )
@@ -372,14 +365,20 @@ def _build_sandbox_image(
    target_image_hash_name = f'{target_image_repo}:{target_image_hash_tag}'
    target_image_generic_name = f'{target_image_repo}:{target_image_tag}'

+    tags_to_add = [target_image_hash_name]
+
+    # Only add the generic tag if the image does not exist
+    # so it does not get overwritten & only points to the earliest version
+    # to avoid "too many layers" after many re-builds
+    if not runtime_builder.image_exists(target_image_generic_name):
+        tags_to_add.append(target_image_generic_name)
+
    try:
-        image_name = runtime_builder.build(
-            path=docker_folder, tags=[target_image_hash_name, target_image_generic_name]
-        )
+        image_name = runtime_builder.build(path=docker_folder, tags=tags_to_add)
        if not image_name:
            raise RuntimeError(f'Build failed for image {target_image_hash_name}')
    except Exception as e:
-        logger.error(f'Sandbox image build failed: {e}')
+        logger.error(f'Sandbox image build failed: {str(e)}')
        raise

    return image_name
@@ -453,7 +452,7 @@ if __name__ == '__main__':
    else:
        # If a build_folder is not provided, after copying the required source code and dynamically creating the
        # Dockerfile, we actually build the Docker image
-        logger.debug('Building image in a temporary folder')
+        logger.info('Building image in a temporary folder')
        docker_builder = DockerRuntimeBuilder(docker.from_env())
        image_name = build_runtime_image(args.base_image, docker_builder)
        print(f'\nBUILT Image: {image_name}\n')
--- a/openhands/runtime/utils/runtime_templates/Dockerfile.j2
+++ b/openhands/runtime/utils/runtime_templates/Dockerfile.j2
@@ -69,8 +69,7 @@ RUN \
    /openhands/miniforge3/bin/mamba run -n base poetry run pip install playwright && \
    /openhands/miniforge3/bin/mamba run -n base poetry run playwright install --with-deps chromium && \
    # Set environment variables
-    export OH_INTERPRETER_PATH=$(/openhands/miniforge3/bin/mamba run -n base poetry run python -c "import sys; print(sys.executable)") && \
-    export OH_VENV_PATH=$(/openhands/miniforge3/bin/mamba run -n base poetry env info --path) && \
+    echo "OH_INTERPRETER_PATH=$(/openhands/miniforge3/bin/mamba run -n base poetry run python -c "import sys; print(sys.executable)")" >> /etc/environment && \
    # Install extra dependencies if specified
    {{ extra_deps }} {% if extra_deps %} && {% endif %} \
    # Clear caches
@@ -81,16 +80,6 @@ RUN \
    # Clean up
    apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
    /openhands/miniforge3/bin/mamba clean --all
-{% if not skip_init %}
-RUN \
-    # Add the Poetry virtual environment to the bashrc
-    echo "export OH_INTERPRETER_PATH=\"$OH_INTERPRETER_PATH\"" >> /etc/bash.bashrc && \
-    echo "export OH_VENV_PATH=\"$OH_VENV_PATH\"" >> /etc/bash.bashrc && \
-    # Activate the Poetry virtual environment
-    echo 'source "$OH_VENV_PATH/bin/activate"' >> /etc/bash.bashrc && \
-    # Use the Poetry virtual environment's Python interpreter
-    echo 'alias python="$OH_INTERPRETER_PATH"' >> /etc/bash.bashrc
-{% endif %}
 # ================================================================
 # END: Copy Project and Install/Update Dependencies
 # ================================================================
--- a/openhands/security/analyzer.py
+++ b/openhands/security/analyzer.py
@@ -24,7 +24,7 @@ class SecurityAnalyzer:

    async def on_event(self, event: Event) -> None:
        """Handles the incoming event, and when Action is received, analyzes it for security risks."""
-        logger.debug(f'SecurityAnalyzer received event: {event}')
+        logger.info(f'SecurityAnalyzer received event: {event}')
        await self.log_event(event)
        if not isinstance(event, Action):
            return
--- a/openhands/security/invariant/analyzer.py
+++ b/openhands/security/invariant/analyzer.py
@@ -150,7 +150,7 @@ class InvariantAnalyzer(SecurityAnalyzer):
            self.event_stream.add_event(new_event, EventSource.AGENT)

    async def security_risk(self, event: Action) -> ActionSecurityRisk:
-        logger.debug('Calling security_risk on InvariantAnalyzer')
+        logger.info('Calling security_risk on InvariantAnalyzer')
        new_elements = parse_element(self.trace, event)
        input = [e.model_dump(exclude_none=True) for e in new_elements]  # type: ignore [call-overload]
        self.trace.extend(new_elements)
--- a/openhands/server/auth/auth.py
+++ b/openhands/server/auth/auth.py
@@ -26,7 +26,7 @@ def get_sid_from_token(token: str, jwt_secret: str) -> str:
    except InvalidTokenError:
        logger.error('Invalid token')
    except Exception as e:
-        logger.exception(f'Unexpected error decoding token: {e}')
+        logger.exception('Unexpected error decoding token: %s', e)
    return ''


--- a/openhands/server/session/agent_session.py
+++ b/openhands/server/session/agent_session.py
@@ -1,4 +1,5 @@
 import asyncio
+from threading import Thread
 from typing import Callable, Optional

 from openhands.controller import AgentController
@@ -6,6 +7,9 @@ from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
 from openhands.core.config import AgentConfig, AppConfig, LLMConfig
 from openhands.core.logger import openhands_logger as logger
+from openhands.core.schema.agent import AgentState
+from openhands.events.action.agent import ChangeAgentStateAction
+from openhands.events.event import EventSource
 from openhands.events.stream import EventStream
 from openhands.runtime import get_runtime_cls
 from openhands.runtime.runtime import Runtime
@@ -27,6 +31,7 @@ class AgentSession:
    runtime: Runtime | None = None
    security_analyzer: SecurityAnalyzer | None = None
    _closed: bool = False
+    loop: asyncio.AbstractEventLoop

    def __init__(self, sid: str, file_store: FileStore):
        """Initializes a new instance of the Session class
@@ -39,6 +44,7 @@ class AgentSession:
        self.sid = sid
        self.event_stream = EventStream(sid, file_store)
        self.file_store = file_store
+        self.loop = asyncio.new_event_loop()

    async def start(
        self,
@@ -65,9 +71,36 @@ class AgentSession:
            raise RuntimeError(
                'Session already started. You need to close this session and start a new one.'
            )
-        await self._create_security_analyzer(config.security.security_analyzer)
-        await self._create_runtime(runtime_name, config, agent, status_message_callback)
-        await self._create_controller(
+
+        self.thread = Thread(target=self._run, daemon=True)
+        self.thread.start()
+
+        coro = self._start(
+            runtime_name,
+            config,
+            agent,
+            max_iterations,
+            max_budget_per_task,
+            agent_to_llm_config,
+            agent_configs,
+            status_message_callback,
+        )
+        asyncio.run_coroutine_threadsafe(coro, self.loop)  # type: ignore
+
+    async def _start(
+        self,
+        runtime_name: str,
+        config: AppConfig,
+        agent: Agent,
+        max_iterations: int,
+        max_budget_per_task: float | None = None,
+        agent_to_llm_config: dict[str, LLMConfig] | None = None,
+        agent_configs: dict[str, AgentConfig] | None = None,
+        status_message_callback: Optional[Callable] = None,
+    ):
+        self._create_security_analyzer(config.security.security_analyzer)
+        self._create_runtime(runtime_name, config, agent, status_message_callback)
+        self._create_controller(
            agent,
            config.security.confirmation_mode,
            max_iterations,
@@ -75,6 +108,16 @@ class AgentSession:
            agent_to_llm_config=agent_to_llm_config,
            agent_configs=agent_configs,
        )
+        self.event_stream.add_event(
+            ChangeAgentStateAction(AgentState.INIT), EventSource.USER
+        )
+        if self.controller:
+            self.controller.agent_task = self.controller.start_step_loop()
+            await self.controller.agent_task  # type: ignore
+
+    def _run(self):
+        asyncio.set_event_loop(self.loop)
+        self.loop.run_forever()

    async def close(self):
        """Closes the Agent session"""
@@ -89,9 +132,13 @@ class AgentSession:
            self.runtime.close()
        if self.security_analyzer is not None:
            await self.security_analyzer.close()
+
+        self.loop.call_soon_threadsafe(self.loop.stop)
+        self.thread.join()
+
        self._closed = True

-    async def _create_security_analyzer(self, security_analyzer: str | None):
+    def _create_security_analyzer(self, security_analyzer: str | None):
        """Creates a SecurityAnalyzer instance that will be used to analyze the agent actions

        Parameters:
@@ -104,7 +151,7 @@ class AgentSession:
                security_analyzer, SecurityAnalyzer
            )(self.event_stream)

-    async def _create_runtime(
+    def _create_runtime(
        self,
        runtime_name: str,
        config: AppConfig,
@@ -125,8 +172,7 @@ class AgentSession:
        logger.info(f'Initializing runtime `{runtime_name}` now...')
        runtime_cls = get_runtime_cls(runtime_name)

-        self.runtime = await asyncio.to_thread(
-            runtime_cls,
+        self.runtime = runtime_cls(
            config=config,
            event_stream=self.event_stream,
            sid=self.sid,
@@ -141,7 +187,7 @@ class AgentSession:
        else:
            logger.warning('Runtime initialization failed')

-    async def _create_controller(
+    def _create_controller(
        self,
        agent: Agent,
        confirmation_mode: bool,
@@ -196,5 +242,5 @@ class AgentSession:
            )
            logger.info(f'Restored agent state from session, sid: {self.sid}')
        except Exception as e:
-            logger.debug(f'State could not be restored: {e}')
+            logger.info(f'State could not be restored: {e}')
        logger.info('Agent controller initialized.')
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -65,10 +65,10 @@ class Session:
                await self.dispatch(data)
        except WebSocketDisconnect:
            await self.close()
-            logger.info(f'WebSocket disconnected, sid: {self.sid}')
+            logger.info('WebSocket disconnected, sid: %s', self.sid)
        except RuntimeError as e:
            await self.close()
-            logger.exception(f'Error in loop_recv: {e}')
+            logger.exception('Error in loop_recv: %s', e)

    async def _initialize_agent(self, data: dict):
        self.agent_session.event_stream.add_event(
@@ -123,9 +123,6 @@ class Session:
                f'Error creating controller. Please check Docker is running and visit `{TROUBLESHOOTING_URL}` for more debugging information..'
            )
            return
-        self.agent_session.event_stream.add_event(
-            ChangeAgentStateAction(AgentState.INIT), EventSource.USER
-        )

    async def on_event(self, event: Event):
        """Callback function for events that mainly come from the agent.
@@ -165,6 +162,9 @@ class Session:
                        'Model does not support image upload, change to a different model or try without an image.'
                    )
                    return
+        asyncio.run_coroutine_threadsafe(self._add_event(event, EventSource.USER), self.agent_session.loop) # type: ignore
+
+    async def _add_event(self, event, event_source):
        self.agent_session.event_stream.add_event(event, EventSource.USER)

    async def send(self, data: dict[str, object]) -> bool:
@@ -190,6 +190,10 @@ class Session:
        """Sends a message to the client."""
        return await self.send({'message': message})

+    async def send_status_message(self, message: str) -> bool:
+        """Sends a status message to the client."""
+        return await self.send({'status': message})
+
    def update_connection(self, ws: WebSocket):
        self.websocket = ws
        self.is_alive = True
@@ -205,4 +209,4 @@ class Session:
    def queue_status_message(self, message: str):
        """Queues a status message to be sent asynchronously."""
        # Ensure the coroutine runs in the main event loop
-        asyncio.run_coroutine_threadsafe(self.send_message(message), self.loop)
+        asyncio.run_coroutine_threadsafe(self.send_status_message(message), self.loop)
--- a/poetry.lock
+++ b/poetry.lock
@@ -571,17 +571,17 @@ files = [

 [[package]]
 name = "boto3"
-version = "1.35.25"
+version = "1.35.29"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "boto3-1.35.25-py3-none-any.whl", hash = "sha256:b1cfad301184cdd44dfd4805187ccab12de8dd28dd12a11a5cfdace17918c6de"},
-    {file = "boto3-1.35.25.tar.gz", hash = "sha256:5df4e2cbe3409db07d3a0d8d63d5220ce3202a78206ad87afdbb41519b26ce45"},
+    {file = "boto3-1.35.29-py3-none-any.whl", hash = "sha256:2244044cdfa8ac345d7400536dc15a4824835e7ec5c55bc267e118af66bb27db"},
+    {file = "boto3-1.35.29.tar.gz", hash = "sha256:7bbb1ee649e09e956952285782cfdebd7e81fc78384f48dfab3d66c6eaf3f63f"},
 ]

 [package.dependencies]
-botocore = ">=1.35.25,<1.36.0"
+botocore = ">=1.35.29,<1.36.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.10.0,<0.11.0"

@@ -590,13 +590,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]

 [[package]]
 name = "botocore"
-version = "1.35.25"
+version = "1.35.29"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "botocore-1.35.25-py3-none-any.whl", hash = "sha256:e58d60260abf10ccc4417967923117c9902a6a0cff9fddb6ea7ff42dc1bd4630"},
-    {file = "botocore-1.35.25.tar.gz", hash = "sha256:76c5706b2c6533000603ae8683a297c887abbbaf6ee31e1b2e2863b74b2989bc"},
+    {file = "botocore-1.35.29-py3-none-any.whl", hash = "sha256:f8e3ae0d84214eff3fb69cb4dc51cea6c43d3bde82027a94d00c52b941d6c3d5"},
+    {file = "botocore-1.35.29.tar.gz", hash = "sha256:4ed28ab03675bb008a290c452c5ddd7aaa5d4e3fa1912aadbdf93057ee84362b"},
 ]

 [package.dependencies]
@@ -609,32 +609,32 @@ crt = ["awscrt (==0.21.5)"]

 [[package]]
 name = "browsergym"
-version = "0.7.0"
+version = "0.7.1"
 description = "BrowserGym: a gym environment for web task automation in the Chromium browser"
 optional = false
 python-versions = ">3.7"
 files = [
-    {file = "browsergym-0.7.0-py3-none-any.whl", hash = "sha256:e2b98d2990ec1bfd80fd3e8034e60a60f363a5240be794e0ace975f24601d1a8"},
-    {file = "browsergym-0.7.0.tar.gz", hash = "sha256:e1cd9812b32a9387bac42b726bf7669c35a46b5fe6d1faf939333f095d5a6ba5"},
+    {file = "browsergym-0.7.1-py3-none-any.whl", hash = "sha256:af216abf3e1ad538e4d31e5bf96da03768ac4aabc9a566159355fa2b6af093da"},
+    {file = "browsergym-0.7.1.tar.gz", hash = "sha256:c269eb8b6da4bd186c05529f3492a9bef2210a89e2cdae4b7557b6ae7091c28e"},
 ]

 [package.dependencies]
-browsergym-core = "0.7.0"
-browsergym-experiments = "0.7.0"
-browsergym-miniwob = "0.7.0"
-browsergym-visualwebarena = "0.7.0"
-browsergym-webarena = "0.7.0"
+browsergym-core = "0.7.1"
+browsergym-experiments = "0.7.1"
+browsergym-miniwob = "0.7.1"
+browsergym-visualwebarena = "0.7.1"
+browsergym-webarena = "0.7.1"
 browsergym-workarena = "*"

 [[package]]
 name = "browsergym-core"
-version = "0.7.0"
+version = "0.7.1"
 description = "BrowserGym: a gym environment for web task automation in the Chromium browser"
 optional = false
 python-versions = ">3.9"
 files = [
-    {file = "browsergym_core-0.7.0-py3-none-any.whl", hash = "sha256:4f4c7a153daa984701f76e81eaa358b4a9684e8f3fb4dcd80c807e7ed8112914"},
-    {file = "browsergym_core-0.7.0.tar.gz", hash = "sha256:069987057dcdea2c25b1b631691f93d77c2d042108079c16874128dcc459d809"},
+    {file = "browsergym_core-0.7.1-py3-none-any.whl", hash = "sha256:28a79537e91fd0dff639fbed9d1f3318b99f8aa5efe054f2468fc0bf2d220ba6"},
+    {file = "browsergym_core-0.7.1.tar.gz", hash = "sha256:da6bdd190a8ccdc8394e68a2a17701b7af3208e0267ca7ed9fd33dc4c2c7ea99"},
 ]

 [package.dependencies]
@@ -643,67 +643,67 @@ gymnasium = ">=0.27"
 lxml = ">=4.9"
 numpy = ">=1.14"
 pillow = ">=10.1"
-playwright = ">=1.32,<1.40"
+playwright = ">=1.39,<2.0"
 pyparsing = ">=3"

 [[package]]
 name = "browsergym-experiments"
-version = "0.7.0"
+version = "0.7.1"
 description = "Experimentation tools for BrowserGym"
 optional = false
 python-versions = ">3.7"
 files = [
-    {file = "browsergym_experiments-0.7.0-py3-none-any.whl", hash = "sha256:c10f810eb631622804ebbf5e5783636cf8aff2a53ea0e38bfcfb129273865b1b"},
-    {file = "browsergym_experiments-0.7.0.tar.gz", hash = "sha256:9ee937720d2b84563851a2ae2c94c685da299fbadd957ba743ef7f1351fd0e23"},
+    {file = "browsergym_experiments-0.7.1-py3-none-any.whl", hash = "sha256:0f3104da708436fe93460cd609590d28aa9dcbeda68d8a51599a56daaea7cd96"},
+    {file = "browsergym_experiments-0.7.1.tar.gz", hash = "sha256:75f9e5676e625cb7ec4a5fbce8d6832708b01cb4e5af9c150cfd27182af60a74"},
 ]

 [package.dependencies]
-browsergym-core = "0.7.0"
+browsergym-core = "0.7.1"
 tiktoken = ">=0.4"

 [[package]]
 name = "browsergym-miniwob"
-version = "0.7.0"
+version = "0.7.1"
 description = "MiniWoB++ benchmark for BrowserGym"
 optional = false
 python-versions = ">3.7"
 files = [
-    {file = "browsergym_miniwob-0.7.0-py3-none-any.whl", hash = "sha256:9223400aa737dcbca79884a6174b67635ec5b913f490232b60e5391fc34eecb4"},
-    {file = "browsergym_miniwob-0.7.0.tar.gz", hash = "sha256:b4d248541a86f9dc21c9fc5a03699ef16dfd96a97d9347d3c6ef4ae9145f691f"},
+    {file = "browsergym_miniwob-0.7.1-py3-none-any.whl", hash = "sha256:69f560b5d0210a5db3b2672d0ac48e274170f765832e3628da3fd0ba694d3f40"},
+    {file = "browsergym_miniwob-0.7.1.tar.gz", hash = "sha256:635909cbe0646985699fc65715c463e258c04dad16521bfa59cb0ae4ec797f8f"},
 ]

 [package.dependencies]
-browsergym-core = "0.7.0"
+browsergym-core = "0.7.1"

 [[package]]
 name = "browsergym-visualwebarena"
-version = "0.7.0"
+version = "0.7.1"
 description = "VisualWebArena benchmark for BrowserGym"
 optional = false
 python-versions = ">3.7"
 files = [
-    {file = "browsergym_visualwebarena-0.7.0-py3-none-any.whl", hash = "sha256:499124dd8a0619905049598428205cad4d3237e6acef80225f3c734f428b16b9"},
-    {file = "browsergym_visualwebarena-0.7.0.tar.gz", hash = "sha256:78fd89a922b94b7de912b6ab44d48845a25283eb7265c526811542f6833edbaa"},
+    {file = "browsergym_visualwebarena-0.7.1-py3-none-any.whl", hash = "sha256:bf9bb0d2f406276531aee10dd04371b152cfa2c703402291e57a04ea47847c43"},
+    {file = "browsergym_visualwebarena-0.7.1.tar.gz", hash = "sha256:b26db1d75a9ecae7d97a1bbefad2d7ea10e49119e4aec8320cf6f8bcd265e45c"},
 ]

 [package.dependencies]
-browsergym-core = "0.7.0"
+browsergym-core = "0.7.1"
 libvisualwebarena = "0.0.8"
 requests = "*"

 [[package]]
 name = "browsergym-webarena"
-version = "0.7.0"
+version = "0.7.1"
 description = "WebArena benchmark for BrowserGym"
 optional = false
 python-versions = ">3.7"
 files = [
-    {file = "browsergym_webarena-0.7.0-py3-none-any.whl", hash = "sha256:d04b2cdadce47ffc9b4d6751f7f5dbd403e561cf4bf2b80801edcbb03bcf8ce6"},
-    {file = "browsergym_webarena-0.7.0.tar.gz", hash = "sha256:f7b0839ca009962457a03c948261fb36fbcbababd60208132ec77f92c6a19a59"},
+    {file = "browsergym_webarena-0.7.1-py3-none-any.whl", hash = "sha256:117cb2946d8a9b3536d0a55300eb28b7650c0e70339855bed8890f0f8fc887e9"},
+    {file = "browsergym_webarena-0.7.1.tar.gz", hash = "sha256:568de29ab0a7a1a569855e1bb71af5cfd4e982156988a64e30ada3bfe309b399"},
 ]

 [package.dependencies]
-browsergym-core = "0.7.0"
+browsergym-core = "0.7.1"
 libwebarena = "0.0.3"

 [[package]]
@@ -1014,13 +1014,13 @@ numpy = "*"

 [[package]]
 name = "chromadb"
-version = "0.5.7"
+version = "0.5.11"
 description = "Chroma."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "chromadb-0.5.7-py3-none-any.whl", hash = "sha256:2358f92804cd198b125de73076ec48f9f55c729df119919a76a6716ad0e465f6"},
-    {file = "chromadb-0.5.7.tar.gz", hash = "sha256:3432865025ef3ceeaee0a59b265a784d8b5978cb7c41593c74ddd2427c776c94"},
+    {file = "chromadb-0.5.11-py3-none-any.whl", hash = "sha256:f02d9326869cea926f980bd6c9a0150a0ef2e151072f325998c16a9502fb4b25"},
+    {file = "chromadb-0.5.11.tar.gz", hash = "sha256:252e970b3e1a27b594cc7b3685238691bf8eaa232225d4dee9e33ec83580775f"},
 ]

 [package.dependencies]
@@ -1351,13 +1351,13 @@ typing-inspect = ">=0.4.0,<1"

 [[package]]
 name = "datasets"
-version = "3.0.0"
+version = "3.0.1"
 description = "HuggingFace community-driven open-source library of datasets"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "datasets-3.0.0-py3-none-any.whl", hash = "sha256:c23fefb6c953dcb1cd5f6deb6c502729c733ef98791e0c3f2d80c7ca2d9a01dd"},
-    {file = "datasets-3.0.0.tar.gz", hash = "sha256:592317eb137f0fc5aac068ff283ba13c3c66d10c9c034d44bc8aa584126cf3e2"},
+    {file = "datasets-3.0.1-py3-none-any.whl", hash = "sha256:db080aab41c8cc68645117a0f172e5c6789cbc672f066de0aa5a08fc3eebc686"},
+    {file = "datasets-3.0.1.tar.gz", hash = "sha256:40d63b09e76a3066c32e746d6fdc36fd3f29ed2acd49bf5b1a2100da32936511"},
 ]

 [package.dependencies]
@@ -1379,15 +1379,15 @@ xxhash = "*"
 [package.extras]
 audio = ["librosa", "soundfile (>=0.12.1)", "soxr (>=0.4.0)"]
 benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
-dev = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0)", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "transformers (>=4.42.0)", "zstandard"]
+dev = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0)", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "torchdata", "transformers", "transformers (>=4.42.0)", "zstandard"]
 docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"]
 jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"]
 quality = ["ruff (>=0.3.0)"]
 s3 = ["s3fs"]
 tensorflow = ["tensorflow (>=2.6.0)"]
 tensorflow-gpu = ["tensorflow (>=2.6.0)"]
-tests = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0)", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers (>=4.42.0)", "zstandard"]
-tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "transformers (>=4.42.0)", "zstandard"]
+tests = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0)", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "torchdata", "transformers (>=4.42.0)", "zstandard"]
+tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "torchdata", "transformers (>=4.42.0)", "zstandard"]
 torch = ["torch"]
 vision = ["Pillow (>=9.4.0)"]

@@ -2142,13 +2142,13 @@ test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit",

 [[package]]
 name = "google-ai-generativelanguage"
-version = "0.6.9"
+version = "0.6.10"
 description = "Google Ai Generativelanguage API client library"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "google_ai_generativelanguage-0.6.9-py3-none-any.whl", hash = "sha256:50360cd80015d1a8cc70952e98560f32fa06ddee2e8e9f4b4b98e431dc561e0b"},
-    {file = "google_ai_generativelanguage-0.6.9.tar.gz", hash = "sha256:899f1d3a06efa9739f1cd9d2788070178db33c89d4a76f2e8f4da76f649155fa"},
+    {file = "google_ai_generativelanguage-0.6.10-py3-none-any.whl", hash = "sha256:854a2bf833d18be05ad5ef13c755567b66a4f4a870f099b62c61fe11bddabcf4"},
+    {file = "google_ai_generativelanguage-0.6.10.tar.gz", hash = "sha256:6fa642c964d8728006fe7e8771026fc0b599ae0ebeaf83caf550941e8e693455"},
 ]

 [package.dependencies]
@@ -2240,13 +2240,13 @@ httplib2 = ">=0.19.0"

 [[package]]
 name = "google-cloud-aiplatform"
-version = "1.67.1"
+version = "1.68.0"
 description = "Vertex AI API client library"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "google-cloud-aiplatform-1.67.1.tar.gz", hash = "sha256:701a19061c8c670baa93464ca0b8a1a8720494f802187cef06bc9fcf952db315"},
-    {file = "google_cloud_aiplatform-1.67.1-py2.py3-none-any.whl", hash = "sha256:2ff0e1794839fcf74d644f3f54ff2de5d8099b3e388edecc48f6d620c1f3582c"},
+    {file = "google-cloud-aiplatform-1.68.0.tar.gz", hash = "sha256:d74e9f33707c7a14c6a32a7cfe9acd32b90975dfba9fac487d105c8ba5197f40"},
+    {file = "google_cloud_aiplatform-1.68.0-py2.py3-none-any.whl", hash = "sha256:24dacc34457665ab6054bdf47e2475793dcf2d865b568420a909b452a477b3e6"},
 ]

 [package.dependencies]
@@ -2277,7 +2277,7 @@ pipelines = ["pyyaml (>=5.3.1,<7)"]
 prediction = ["docker (>=5.0.3)", "fastapi (>=0.71.0,<=0.114.0)", "httpx (>=0.23.0,<0.25.0)", "starlette (>=0.17.1)", "uvicorn[standard] (>=0.16.0)"]
 private-endpoints = ["requests (>=2.28.1)", "urllib3 (>=1.21.1,<1.27)"]
 ray = ["google-cloud-bigquery", "google-cloud-bigquery-storage", "immutabledict", "pandas (>=1.0.0,<2.2.0)", "pyarrow (>=6.0.1)", "ray[default] (>=2.4,<2.5.dev0 || >2.9.0,!=2.9.1,!=2.9.2,<2.10.dev0 || >=2.33.dev0,<=2.33.0)", "ray[default] (>=2.5,<=2.33.0)", "setuptools (<70.0.0)"]
-ray-testing = ["google-cloud-bigquery", "google-cloud-bigquery-storage", "immutabledict", "pandas (>=1.0.0,<2.2.0)", "pyarrow (>=6.0.1)", "pytest-xdist", "ray[default] (>=2.4,<2.5.dev0 || >2.9.0,!=2.9.1,!=2.9.2,<2.10.dev0 || >=2.33.dev0,<=2.33.0)", "ray[default] (>=2.5,<=2.33.0)", "ray[train] (==2.9.3)", "scikit-learn", "setuptools (<70.0.0)", "tensorflow", "torch (>=2.0.0,<2.1.0)", "xgboost", "xgboost-ray"]
+ray-testing = ["google-cloud-bigquery", "google-cloud-bigquery-storage", "immutabledict", "pandas (>=1.0.0,<2.2.0)", "pyarrow (>=6.0.1)", "pytest-xdist", "ray[default] (>=2.4,<2.5.dev0 || >2.9.0,!=2.9.1,!=2.9.2,<2.10.dev0 || >=2.33.dev0,<=2.33.0)", "ray[default] (>=2.5,<=2.33.0)", "ray[train]", "scikit-learn", "setuptools (<70.0.0)", "tensorflow", "torch (>=2.0.0,<2.1.0)", "xgboost", "xgboost-ray"]
 reasoningengine = ["cloudpickle (>=3.0,<4.0)", "google-cloud-trace (<2)", "opentelemetry-exporter-gcp-trace (<2)", "opentelemetry-sdk (<2)", "pydantic (>=2.6.3,<3)"]
 tensorboard = ["tensorboard-plugin-profile (>=2.4.0,<3.0.0dev)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.4.0,<3.0.0dev)", "werkzeug (>=2.0.0,<2.1.0dev)"]
 testing = ["bigframes", "docker (>=5.0.3)", "explainable-ai-sdk (>=1.0.0)", "fastapi (>=0.71.0,<=0.114.0)", "google-api-core (>=2.11,<3.0.0)", "google-cloud-bigquery", "google-cloud-bigquery-storage", "google-vizier (>=0.1.6)", "grpcio-testing", "httpx (>=0.23.0,<0.25.0)", "immutabledict", "ipython", "kfp (>=2.6.0,<3.0.0)", "lit-nlp (==0.4.0)", "mlflow (>=1.27.0,<=2.16.0)", "nltk", "numpy (>=1.15.0)", "pandas (>=1.0.0)", "pandas (>=1.0.0,<2.2.0)", "pyarrow (>=10.0.1)", "pyarrow (>=14.0.0)", "pyarrow (>=3.0.0,<8.0dev)", "pyarrow (>=6.0.1)", "pytest-asyncio", "pytest-xdist", "pyyaml (>=5.3.1,<7)", "ray[default] (>=2.4,<2.5.dev0 || >2.9.0,!=2.9.1,!=2.9.2,<2.10.dev0 || >=2.33.dev0,<=2.33.0)", "ray[default] (>=2.5,<=2.33.0)", "requests (>=2.28.1)", "requests-toolbelt (<1.0.0)", "scikit-learn", "sentencepiece (>=0.2.0)", "setuptools (<70.0.0)", "starlette (>=0.17.1)", "tensorboard-plugin-profile (>=2.4.0,<3.0.0dev)", "tensorflow (==2.13.0)", "tensorflow (==2.16.1)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.4.0,<3.0.0dev)", "torch (>=2.0.0,<2.1.0)", "torch (>=2.2.0)", "tqdm (>=4.23.0)", "urllib3 (>=1.21.1,<1.27)", "uvicorn[standard] (>=0.16.0)", "werkzeug (>=2.0.0,<2.1.0dev)", "xgboost"]
@@ -2457,16 +2457,16 @@ testing = ["pytest"]

 [[package]]
 name = "google-generativeai"
-version = "0.8.1"
+version = "0.8.2"
 description = "Google Generative AI High level API client library and tools."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "google_generativeai-0.8.1-py3-none-any.whl", hash = "sha256:b031877f24d51af0945207657c085896a0a886eceec7a1cb7029327b0aa6e2f6"},
+    {file = "google_generativeai-0.8.2-py3-none-any.whl", hash = "sha256:fabc0e2e8d2bfb6fdb1653e91dba83fecb2a2a6878883b80017def90fda8032d"},
 ]

 [package.dependencies]
-google-ai-generativelanguage = "0.6.9"
+google-ai-generativelanguage = "0.6.10"
 google-api-core = "*"
 google-api-python-client = "*"
 google-auth = ">=2.15.0"
@@ -3234,13 +3234,13 @@ files = [

 [[package]]
 name = "json-repair"
-version = "0.29.4"
+version = "0.29.7"
 description = "A package to repair broken json strings"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "json_repair-0.29.4-py3-none-any.whl", hash = "sha256:2d7addfa01e3b4c295c4ebabd5f393127adae0d345616d3a2517df8260429dae"},
-    {file = "json_repair-0.29.4.tar.gz", hash = "sha256:2921760e707ac0d0b63478402fd6ea3162d4191adf873b396becb31c47a1ac30"},
+    {file = "json_repair-0.29.7-py3-none-any.whl", hash = "sha256:efbc4d541001bda23012a68902d38f28ce1db4981ccb6f9e7371e264f10196c8"},
+    {file = "json_repair-0.29.7.tar.gz", hash = "sha256:d43c3aae2dd743e0ea55a865b8a507b3bd6d5bf54d97701dc56d71b49e45b41a"},
 ]

 [[package]]
@@ -3762,13 +3762,13 @@ types-tqdm = "*"

 [[package]]
 name = "litellm"
-version = "1.48.0"
+version = "1.48.6"
 description = "Library to easily interface with LLM API providers"
 optional = false
 python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
 files = [
-    {file = "litellm-1.48.0-py3-none-any.whl", hash = "sha256:7765e8a92069778f5fc66aacfabd0e2f8ec8d74fb117f5e475567d89b0d376b9"},
-    {file = "litellm-1.48.0.tar.gz", hash = "sha256:31a9b8a25a9daf44c24ddc08bf74298da920f2c5cea44135e5061278d0aa6fc9"},
+    {file = "litellm-1.48.6-py3-none-any.whl", hash = "sha256:7f6e0f787790d29c4464123bae92712ceb2dd1e05eef1ea90182663c4e4762a3"},
+    {file = "litellm-1.48.6.tar.gz", hash = "sha256:44584867d115ba0c1bb5f39efbc8a6131642e63d078e6a9cf2e7abe969d5edf6"},
 ]

 [package.dependencies]
@@ -3805,19 +3805,19 @@ pydantic = ">=1.10"

 [[package]]
 name = "llama-index"
-version = "0.11.12"
+version = "0.11.14"
 description = "Interface between LLMs and your data"
 optional = false
 python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "llama_index-0.11.12-py3-none-any.whl", hash = "sha256:a7d0b4065df2689cec1baeab9bfaed4d94e4ddc7e941df2ee47abfb218ce3ea1"},
-    {file = "llama_index-0.11.12.tar.gz", hash = "sha256:6b9220bf4c76a4ac0a82ccc642c3ea94f51381a9718ac601021f2fa95b74aab1"},
+    {file = "llama_index-0.11.14-py3-none-any.whl", hash = "sha256:69447a25cb73f910146200e8f45579e0a6e5e390bb2818f229e68fbb625e0a2d"},
+    {file = "llama_index-0.11.14.tar.gz", hash = "sha256:6d18093550bdf92442dc7aa0e4d9fef2616941e3d101409340d47c7a99b9f739"},
 ]

 [package.dependencies]
 llama-index-agent-openai = ">=0.3.4,<0.4.0"
 llama-index-cli = ">=0.3.1,<0.4.0"
-llama-index-core = ">=0.11.11,<0.12.0"
+llama-index-core = ">=0.11.14,<0.12.0"
 llama-index-embeddings-openai = ">=0.2.4,<0.3.0"
 llama-index-indices-managed-llama-cloud = ">=0.3.0"
 llama-index-legacy = ">=0.9.48,<0.10.0"
@@ -3863,13 +3863,13 @@ llama-index-llms-openai = ">=0.2.0,<0.3.0"

 [[package]]
 name = "llama-index-core"
-version = "0.11.12"
+version = "0.11.14"
 description = "Interface between LLMs and your data"
 optional = false
 python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "llama_index_core-0.11.12-py3-none-any.whl", hash = "sha256:7dc7ead649bac8f09e61c6c8bf93d257f68a7315223552421be4f0ffc3a8054d"},
-    {file = "llama_index_core-0.11.12.tar.gz", hash = "sha256:ce2dd037ff889d9ea6b25872228cc9de614c10445d19377f6ae5c66b93a50c61"},
+    {file = "llama_index_core-0.11.14-py3-none-any.whl", hash = "sha256:e63e5b1f4daa56952a7846cbbf0265b1288909efaea866216a4c6fb65daa2923"},
+    {file = "llama_index_core-0.11.14.tar.gz", hash = "sha256:6ff7be9f5bbb04be0d8064f76510edf79f8a9833ebae28b46261b274556827ca"},
 ]

 [package.dependencies]
@@ -5365,13 +5365,13 @@ sympy = "*"

 [[package]]
 name = "openai"
-version = "1.47.1"
+version = "1.50.2"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.7.1"
 files = [
-    {file = "openai-1.47.1-py3-none-any.whl", hash = "sha256:34277583bf268bb2494bc03f48ac123788c5e2a914db1d5a23d5edc29d35c825"},
-    {file = "openai-1.47.1.tar.gz", hash = "sha256:62c8f5f478f82ffafc93b33040f8bb16a45948306198bd0cba2da2ecd9cf7323"},
+    {file = "openai-1.50.2-py3-none-any.whl", hash = "sha256:822dd2051baa3393d0d5406990611975dd6f533020dc9375a34d4fe67e8b75f7"},
+    {file = "openai-1.50.2.tar.gz", hash = "sha256:3987ae027152fc8bea745d60b02c8f4c4a76e1b5c70e73565fa556db6f78c9e6"},
 ]

 [package.dependencies]
@@ -6757,18 +6757,15 @@ files = [

 [[package]]
 name = "python-multipart"
-version = "0.0.9"
+version = "0.0.12"
 description = "A streaming multipart parser for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "python_multipart-0.0.9-py3-none-any.whl", hash = "sha256:97ca7b8ea7b05f977dc3849c3ba99d51689822fab725c3703af7c866a0c2b215"},
-    {file = "python_multipart-0.0.9.tar.gz", hash = "sha256:03f54688c663f1b7977105f021043b0793151e4cb1c1a9d4a11fc13d622c4026"},
+    {file = "python_multipart-0.0.12-py3-none-any.whl", hash = "sha256:43dcf96cf65888a9cd3423544dd0d75ac10f7aa0c3c28a175bbcd00c9ce1aebf"},
+    {file = "python_multipart-0.0.12.tar.gz", hash = "sha256:045e1f98d719c1ce085ed7f7e1ef9d8ccc8c02ba02b5566d5f7521410ced58cb"},
 ]

-[package.extras]
-dev = ["atomicwrites (==1.4.1)", "attrs (==23.2.0)", "coverage (==7.4.1)", "hatch", "invoke (==2.2.0)", "more-itertools (==10.2.0)", "pbr (==6.0.0)", "pluggy (==1.4.0)", "py (==1.11.0)", "pytest (==8.0.0)", "pytest-cov (==4.1.0)", "pytest-timeout (==2.2.0)", "pyyaml (==6.0.1)", "ruff (==0.2.1)"]
-
 [[package]]
 name = "python-pptx"
 version = "1.0.2"
@@ -7123,13 +7120,13 @@ files = [

 [[package]]
 name = "reportlab"
-version = "4.2.2"
+version = "4.2.4"
 description = "The Reportlab Toolkit"
 optional = false
 python-versions = "<4,>=3.7"
 files = [
-    {file = "reportlab-4.2.2-py3-none-any.whl", hash = "sha256:927616931637e2f13e2ee3b3b6316d7a07803170e258621cff7d138bde17fbb5"},
-    {file = "reportlab-4.2.2.tar.gz", hash = "sha256:765eecbdd68491c56947e29c38b8b69b834ee5dbbdd2fb7409f08ebdebf04428"},
+    {file = "reportlab-4.2.4-py3-none-any.whl", hash = "sha256:6e4d86647b8bfd772f475a58f9b0dcba4b340b1969f0db36333089f6ca9ab362"},
+    {file = "reportlab-4.2.4.tar.gz", hash = "sha256:a00b57292e156a7bda84edf31d60c25578153076c8fb96331d0c59eddda052c8"},
 ]

 [package.dependencies]
@@ -7367,29 +7364,29 @@ pyasn1 = ">=0.1.3"

 [[package]]
 name = "ruff"
-version = "0.6.7"
+version = "0.6.8"
 description = "An extremely fast Python linter and code formatter, written in Rust."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "ruff-0.6.7-py3-none-linux_armv6l.whl", hash = "sha256:08277b217534bfdcc2e1377f7f933e1c7957453e8a79764d004e44c40db923f2"},
-    {file = "ruff-0.6.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:c6707a32e03b791f4448dc0dce24b636cbcdee4dd5607adc24e5ee73fd86c00a"},
-    {file = "ruff-0.6.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:533d66b7774ef224e7cf91506a7dafcc9e8ec7c059263ec46629e54e7b1f90ab"},
-    {file = "ruff-0.6.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17a86aac6f915932d259f7bec79173e356165518859f94649d8c50b81ff087e9"},
-    {file = "ruff-0.6.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b3f8822defd260ae2460ea3832b24d37d203c3577f48b055590a426a722d50ef"},
-    {file = "ruff-0.6.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9ba4efe5c6dbbb58be58dd83feedb83b5e95c00091bf09987b4baf510fee5c99"},
-    {file = "ruff-0.6.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:525201b77f94d2b54868f0cbe5edc018e64c22563da6c5c2e5c107a4e85c1c0d"},
-    {file = "ruff-0.6.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8854450839f339e1049fdbe15d875384242b8e85d5c6947bb2faad33c651020b"},
-    {file = "ruff-0.6.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f0b62056246234d59cbf2ea66e84812dc9ec4540518e37553513392c171cb18"},
-    {file = "ruff-0.6.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b1462fa56c832dc0cea5b4041cfc9c97813505d11cce74ebc6d1aae068de36b"},
-    {file = "ruff-0.6.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:02b083770e4cdb1495ed313f5694c62808e71764ec6ee5db84eedd82fd32d8f5"},
-    {file = "ruff-0.6.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0c05fd37013de36dfa883a3854fae57b3113aaa8abf5dea79202675991d48624"},
-    {file = "ruff-0.6.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f49c9caa28d9bbfac4a637ae10327b3db00f47d038f3fbb2195c4d682e925b14"},
-    {file = "ruff-0.6.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:a0e1655868164e114ba43a908fd2d64a271a23660195017c17691fb6355d59bb"},
-    {file = "ruff-0.6.7-py3-none-win32.whl", hash = "sha256:a939ca435b49f6966a7dd64b765c9df16f1faed0ca3b6f16acdf7731969deb35"},
-    {file = "ruff-0.6.7-py3-none-win_amd64.whl", hash = "sha256:590445eec5653f36248584579c06252ad2e110a5d1f32db5420de35fb0e1c977"},
-    {file = "ruff-0.6.7-py3-none-win_arm64.whl", hash = "sha256:b28f0d5e2f771c1fe3c7a45d3f53916fc74a480698c4b5731f0bea61e52137c8"},
-    {file = "ruff-0.6.7.tar.gz", hash = "sha256:44e52129d82266fa59b587e2cd74def5637b730a69c4542525dfdecfaae38bd5"},
+    {file = "ruff-0.6.8-py3-none-linux_armv6l.whl", hash = "sha256:77944bca110ff0a43b768f05a529fecd0706aac7bcce36d7f1eeb4cbfca5f0f2"},
+    {file = "ruff-0.6.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:27b87e1801e786cd6ede4ada3faa5e254ce774de835e6723fd94551464c56b8c"},
+    {file = "ruff-0.6.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:cd48f945da2a6334f1793d7f701725a76ba93bf3d73c36f6b21fb04d5338dcf5"},
+    {file = "ruff-0.6.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:677e03c00f37c66cea033274295a983c7c546edea5043d0c798833adf4cf4c6f"},
+    {file = "ruff-0.6.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9f1476236b3eacfacfc0f66aa9e6cd39f2a624cb73ea99189556015f27c0bdeb"},
+    {file = "ruff-0.6.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f5a2f17c7d32991169195d52a04c95b256378bbf0de8cb98478351eb70d526f"},
+    {file = "ruff-0.6.8-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5fd0d4b7b1457c49e435ee1e437900ced9b35cb8dc5178921dfb7d98d65a08d0"},
+    {file = "ruff-0.6.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8034b19b993e9601f2ddf2c517451e17a6ab5cdb1c13fdff50c1442a7171d87"},
+    {file = "ruff-0.6.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6cfb227b932ba8ef6e56c9f875d987973cd5e35bc5d05f5abf045af78ad8e098"},
+    {file = "ruff-0.6.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ef0411eccfc3909269fed47c61ffebdcb84a04504bafa6b6df9b85c27e813b0"},
+    {file = "ruff-0.6.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:007dee844738c3d2e6c24ab5bc7d43c99ba3e1943bd2d95d598582e9c1b27750"},
+    {file = "ruff-0.6.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ce60058d3cdd8490e5e5471ef086b3f1e90ab872b548814e35930e21d848c9ce"},
+    {file = "ruff-0.6.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1085c455d1b3fdb8021ad534379c60353b81ba079712bce7a900e834859182fa"},
+    {file = "ruff-0.6.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:70edf6a93b19481affd287d696d9e311388d808671bc209fb8907b46a8c3af44"},
+    {file = "ruff-0.6.8-py3-none-win32.whl", hash = "sha256:792213f7be25316f9b46b854df80a77e0da87ec66691e8f012f887b4a671ab5a"},
+    {file = "ruff-0.6.8-py3-none-win_amd64.whl", hash = "sha256:ec0517dc0f37cad14a5319ba7bba6e7e339d03fbf967a6d69b0907d61be7a263"},
+    {file = "ruff-0.6.8-py3-none-win_arm64.whl", hash = "sha256:8d3bb2e3fbb9875172119021a13eed38849e762499e3cfde9588e4b4d70968dc"},
+    {file = "ruff-0.6.8.tar.gz", hash = "sha256:a5bf44b1aa0adaf6d9d20f86162b34f7c593bfedabc51239953e446aefc8ce18"},
 ]

 [[package]]
@@ -9688,4 +9685,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "90636ce436e5c05146a69730f461f46fd3185b595be37d3eafd8aef36667db81"
+content-hash = "78e09d0b5c33f39ec951659658b5b4b46ba206d8f95e9a154be4e0ef869b7c79"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "openhands-ai"
-version = "0.9.4"
+version = "0.9.7"
 description = "OpenHands: Code Less, Make More"
 authors = ["OpenHands"]
 license = "MIT"
@@ -27,7 +27,7 @@ uvicorn = "*"
 types-toml = "*"
 numpy = "*"
 json-repair = "*"
-browsergym = "0.7.0" # integrate browsergym as the browsing interface
+browsergym = "0.7.1" # integrate browsergym as the browsing interface
 html2text = "*"
 e2b = "^0.17.1"
 pexpect = "*"
@@ -65,7 +65,7 @@ llama-index-embeddings-azure-openai = "*"
 llama-index-embeddings-ollama = "*"

 [tool.poetry.group.dev.dependencies]
-ruff = "0.6.7"
+ruff = "0.6.8"
 mypy = "1.11.2"
 pre-commit = "3.8.0"
 build = "*"
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -11,7 +11,7 @@ from http.server import HTTPServer, SimpleHTTPRequestHandler
 import pytest
 from litellm import completion

-from openhands.llm.llm import message_separator
+from openhands.llm.debug_mixin import MESSAGE_SEPARATOR

 script_dir = os.environ.get('SCRIPT_DIR')
 project_root = os.environ.get('PROJECT_ROOT')
@@ -81,19 +81,19 @@ def _format_messages(messages):
    message_str = ''
    for message in messages:
        if isinstance(message, str):
-            message_str += message_separator + message if message_str else message
+            message_str += MESSAGE_SEPARATOR + message if message_str else message
        elif isinstance(message, dict):
            if isinstance(message['content'], list):
                for m in message['content']:
                    if isinstance(m, str):
-                        message_str += message_separator + m if message_str else m
+                        message_str += MESSAGE_SEPARATOR + m if message_str else m
                    elif isinstance(m, dict) and m['type'] == 'text':
                        message_str += (
-                            message_separator + m['text'] if message_str else m['text']
+                            MESSAGE_SEPARATOR + m['text'] if message_str else m['text']
                        )
            elif isinstance(message['content'], str):
                message_str += (
-                    message_separator + message['content']
+                    MESSAGE_SEPARATOR + message['content']
                    if message_str
                    else message['content']
                )
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_001.log
@@ -117,7 +117,6 @@ append_file(file_name: str, content: str) -> None:
    It appends text `content` to the end of the specified file, ideal after a `create_file`!
    Args:
    file_name: str: The name of the file to edit.
-    line_number: int: The line number (starting from 1) to insert the content after.
    content: str: The content to insert.

 search_dir(search_term: str, dir_path: str = './') -> None:
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_005.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_browse_internet/prompt_005.log
@@ -117,7 +117,6 @@ append_file(file_name: str, content: str) -> None:
    It appends text `content` to the end of the specified file, ideal after a `create_file`!
    Args:
    file_name: str: The name of the file to edit.
-    line_number: int: The line number (starting from 1) to insert the content after.
    content: str: The content to insert.

 search_dir(search_term: str, dir_path: str = './') -> None:
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_001.log
@@ -121,7 +121,6 @@ append_file(file_name: str, content: str) -> None:
    It appends text `content` to the end of the specified file, ideal after a `create_file`!
    Args:
    file_name: str: The name of the file to edit.
-    line_number: int: The line number (starting from 1) to insert the content after.
    content: str: The content to insert.

 search_dir(search_term: str, dir_path: str = './') -> None:
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_002.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_002.log
@@ -121,7 +121,6 @@ append_file(file_name: str, content: str) -> None:
    It appends text `content` to the end of the specified file, ideal after a `create_file`!
    Args:
    file_name: str: The name of the file to edit.
-    line_number: int: The line number (starting from 1) to insert the content after.
    content: str: The content to insert.

 search_dir(search_term: str, dir_path: str = './') -> None:
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_003.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_003.log
@@ -121,7 +121,6 @@ append_file(file_name: str, content: str) -> None:
    It appends text `content` to the end of the specified file, ideal after a `create_file`!
    Args:
    file_name: str: The name of the file to edit.
-    line_number: int: The line number (starting from 1) to insert the content after.
    content: str: The content to insert.

 search_dir(search_term: str, dir_path: str = './') -> None:
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_004.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_004.log
@@ -121,7 +121,6 @@ append_file(file_name: str, content: str) -> None:
    It appends text `content` to the end of the specified file, ideal after a `create_file`!
    Args:
    file_name: str: The name of the file to edit.
-    line_number: int: The line number (starting from 1) to insert the content after.
    content: str: The content to insert.

 search_dir(search_term: str, dir_path: str = './') -> None:
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_005.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_edits/prompt_005.log
@@ -121,7 +121,6 @@ append_file(file_name: str, content: str) -> None:
    It appends text `content` to the end of the specified file, ideal after a `create_file`!
    Args:
    file_name: str: The name of the file to edit.
-    line_number: int: The line number (starting from 1) to insert the content after.
    content: str: The content to insert.

 search_dir(search_term: str, dir_path: str = './') -> None:
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython/prompt_001.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython/prompt_001.log
@@ -117,7 +117,6 @@ append_file(file_name: str, content: str) -> None:
    It appends text `content` to the end of the specified file, ideal after a `create_file`!
    Args:
    file_name: str: The name of the file to edit.
-    line_number: int: The line number (starting from 1) to insert the content after.
    content: str: The content to insert.

 search_dir(search_term: str, dir_path: str = './') -> None:
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython/prompt_002.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython/prompt_002.log
@@ -117,7 +117,6 @@ append_file(file_name: str, content: str) -> None:
    It appends text `content` to the end of the specified file, ideal after a `create_file`!
    Args:
    file_name: str: The name of the file to edit.
-    line_number: int: The line number (starting from 1) to insert the content after.
    content: str: The content to insert.

 search_dir(search_term: str, dir_path: str = './') -> None:
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
openhands	e1a848e7cd	Fix issue #3325 : '[Documentation]: config.toml options should be documented on the doc web site'	2024-10-01 14:24:18 +00:00
Xingyao Wang	1109637efb	Update instruction for new version of eval runtime-api (#4128 )	2024-09-30 23:48:38 +00:00
mamoodi	71adfeebab	Update PR Template for better release notes (#4126 )	2024-09-30 17:06:56 -04:00
Robert Brennan	8059e8e298	make runtime url configurable (#4093 )	2024-09-30 18:59:57 +00:00
Xingyao Wang	54ac340e0b	refactor: standardize linter output data structure and interface (#4077 ) Co-authored-by: Graham Neubig <neubig@gmail.com>	2024-10-01 02:40:23 +08:00
dependabot[bot]	13901b4b5a	chore(deps): bump python-multipart from 0.0.9 to 0.0.12 (#4121 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-30 20:30:55 +02:00
dependabot[bot]	0b27d51135	chore(deps): bump litellm from 1.48.5 to 1.48.6 (#4120 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-30 20:30:09 +02:00
dependabot[bot]	f0ce682fa0	chore(deps): bump json-repair from 0.29.5 to 0.29.7 (#4115 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-30 20:28:32 +02:00
dependabot[bot]	3567911da8	chore(deps): bump boto3 from 1.35.28 to 1.35.29 (#4122 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-30 20:27:58 +02:00
Graham Neubig	215d227e5a	Reference the OpenHands resolver (#4125 )	2024-09-30 14:26:12 -04:00
mamoodi	50e6cc6156	Release 0.9.7 (#4123 )	2024-09-30 11:28:16 -04:00
Xingyao Wang	8d6eda3623	fix eval_infer.sh to correctly copy SWE-Bench logs (#4111 )	2024-09-29 18:39:18 -05:00
Cole Murray	d5f965b474	Update LiteLLLM to 1.48.5 (#4110 )	2024-09-29 06:42:59 +00:00
tobitege	c3bbe604eb	(fix) Fix logging in shared eval file to prevent key disclosure (#4108 )	2024-09-28 19:33:16 +00:00
Ana Noemi	c7fe39998c	Update README to decrease unsuccessful drivebys (#4091 )	2024-09-28 18:52:01 +00:00
Xingyao Wang	ec6e07647f	fix hash equivalance verification ci for fork (#4107 )	2024-09-29 02:19:59 +08:00
Graham Neubig	e744eadb8b	Robustify openhands resolver workflow (#4105 )	2024-09-28 11:35:56 -04:00
Engel Nyst	e582806004	Vision and prompt caching fixes (#4014 )	2024-09-28 14:37:29 +02:00
OpenHands	f427f9d8d4	Fix issue #4103 : Improve description of how to do frontend setup and testing in `.openhands_instructions` (#4104 ) Co-authored-by: Graham Neubig <neubig@gmail.com>	2024-09-28 06:41:34 +00:00
Graham Neubig	d669c7b60d	Add github issue resolution workflow (#4102 )	2024-09-28 04:52:52 +00:00
dependabot[bot]	42be4ee5bc	chore(deps-dev): bump openai from 1.48.0 to 1.50.2 (#4101 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-28 05:04:03 +02:00
Engel Nyst	f994277d0f	Make agents follow configured temperature (#4099 )	2024-09-28 01:15:46 +00:00
tofarr	5ccee7c8a7	Fix Bash commands now do not block and actually respect the timeout (#4058 )	2024-09-28 08:40:00 +08:00
tobitege	575a829d94	(enh) add test_python_version to test_bash.py runtime tests (#4098 )	2024-09-28 08:21:14 +08:00
Xingyao Wang	2bed3a424c	chore: pass logger DEBUG mode to client side (#4096 )	2024-09-28 08:21:04 +08:00
Xingyao Wang	a4cc010110	chore: parser fix for deepseek (#4097 )	2024-09-28 08:20:51 +08:00
tobitege	9651368e6a	revert #3871 dockerfile template: don't write to .bashrc file (#4095 )	2024-09-27 21:49:51 +00:00
tofarr	c5025fb66e	Fix Reducing the amount being downloaded every time the hash changes. (#4078 )	2024-09-27 15:48:33 -06:00
Robert Brennan	3f9111c615	add idle time to client server (#4084 )	2024-09-27 19:41:16 +00:00
dependabot[bot]	89e95f2671	chore(deps): bump boto3 from 1.35.27 to 1.35.28 (#4090 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-27 16:48:34 +00:00
dependabot[bot]	5bfa0c2f8d	chore(deps): bump browsergym from 0.7.0 to 0.7.1 (#4089 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-27 16:21:58 +00:00
dependabot[bot]	84141f656d	chore(deps-dev): bump chromadb from 0.5.9 to 0.5.11 (#4088 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-27 16:19:17 +00:00
dependabot[bot]	6ff7506581	chore(deps-dev): bump reportlab from 4.2.2 to 4.2.4 (#4086 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-27 16:01:32 +00:00
dependabot[bot]	41dc7f0256	chore(deps-dev): bump llama-index from 0.11.13 to 0.11.14 (#4085 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-27 15:50:17 +00:00
Xingyao Wang	34f3b61536	[runtime hash] fix runtime hash mismatch between inside `app` image and in "development mode" (#4039 )	2024-09-27 15:26:26 +00:00
dependabot[bot]	4533c47595	chore(deps-dev): bump @types/node from 22.7.2 to 22.7.3 in /frontend (#4081 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-27 15:20:11 +00:00
Xingyao Wang	47774e60b0	chore: remove deprecated dockerfile (#4079 )	2024-09-27 15:03:23 +00:00
Robert Brennan	b78f646b65	Release 0.9.6 (#4076 )	2024-09-26 21:27:17 +00:00
Amir	3e5c01dfc8	Remove param from docstring that does not exist in the append_file (#4060 )	2024-09-26 22:25:11 +02:00
tobitege	29c34e0b6a	(fix) actions.ts: restored handleAssistantMessage handling order (#4074 )	2024-09-26 19:56:12 +00:00
tofarr	c919086e25	Fix for regression (#4075 ) Regression fixed	2024-09-26 12:58:00 -06:00
Engel Nyst	0a03c802f5	Refactor llm.py (#4057 )	2024-09-26 17:44:18 +00:00
Xingyao Wang	081ebdbdd8	[runtime] do not keep rebuilding from generic image (#4072 )	2024-09-26 17:19:46 +00:00
dependabot[bot]	572c7b726d	chore(deps-dev): bump ruff from 0.6.7 to 0.6.8 (#4067 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-26 17:19:16 +00:00
Xingyao Wang	cfc5bb70c1	Update README.md for CodeAct (#4070 )	2024-09-26 16:55:08 +00:00
dependabot[bot]	008b866a38	chore(deps-dev): bump jsdom from 25.0.0 to 25.0.1 in /frontend (#3992 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-26 16:34:29 +00:00
dependabot[bot]	676ad3e140	chore(deps-dev): bump chromadb from 0.5.7 to 0.5.9 (#4069 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-26 16:08:17 +00:00
dependabot[bot]	19278de5d0	chore(deps): bump json-repair from 0.29.4 to 0.29.5 (#4068 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-26 15:48:39 +00:00
dependabot[bot]	891e4a8d34	chore(deps): bump datasets from 3.0.0 to 3.0.1 (#4065 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-26 15:35:36 +00:00
dependabot[bot]	85be8607e0	chore(deps): bump litellm from 1.48.1 to 1.48.2 (#4066 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-26 23:33:24 +08:00
dependabot[bot]	49b244610c	chore(deps-dev): bump openai from 1.47.1 to 1.48.0 (#4063 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-26 17:16:59 +02:00
dependabot[bot]	b347b1d06f	chore(deps): bump boto3 from 1.35.26 to 1.35.27 (#4064 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-26 17:15:11 +02:00
dependabot[bot]	0c86a60b35	chore(deps-dev): bump @types/node from 22.7.0 to 22.7.2 in /frontend (#4062 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-26 19:11:06 +04:00
tofarr	01317138e2	Fix: uvicorn reloading when python files in workspace change, & started section for debugging instructions for developers (#4041 ) Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>	2024-09-26 08:57:37 -06:00
Xingyao Wang	e03855cd7f	Make sure we print the observation in the same way as the LLM sees it	2024-09-26 14:01:48 +00:00
jaki300	757c9593f1	Create gke-example.md (#3795 ) Co-authored-by: Robert Brennan <accounts@rbren.io>	2024-09-26 09:11:33 -04:00
mamoodi	266e8ff951	Release 0.9.5 (#4061 )	2024-09-26 08:36:31 -04:00
dependabot[bot]	3e79cd12a6	chore(deps-dev): bump @types/react from 18.3.8 to 18.3.9 in /frontend (#4029 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-26 14:41:21 +04:00
tobitege	2cc1c3ef42	(enh) Docker runtime builder with BuildKit support, enh. caching (#4009 )	2024-09-26 08:50:53 +02:00
dependabot[bot]	ef0b08a46e	chore(deps-dev): bump tailwindcss from 3.4.12 to 3.4.13 in /frontend (#4030 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-26 05:28:03 +00:00
dependabot[bot]	f1d5202884	chore(deps): bump vite from 5.4.7 to 5.4.8 in /frontend (#4046 )	2024-09-26 12:56:35 +08:00
dependabot[bot]	11cedfb854	chore(deps): bump google-cloud-aiplatform from 1.67.1 to 1.68.0 (#4051 )	2024-09-26 12:56:16 +08:00
dependabot[bot]	6d103a0db2	chore(deps-dev): bump @types/node from 22.6.1 to 22.7.0 in /frontend (#4047 )	2024-09-26 12:56:00 +08:00
Engel Nyst	798aaeaef6	remove Exception in the agent (#4054 )	2024-09-26 06:39:17 +02:00
tofarr	0df4b97e5b	Fix startup statuses (#4053 )	2024-09-25 14:38:32 -06:00
Xingyao Wang	81b3cd71b3	[eval] log evaluating warnings directly to console (#4026 )	2024-09-26 03:42:32 +08:00
Robert Brennan	9241ae2148	Fix persistence of "advanced settings" (#4038 )	2024-09-25 12:57:08 -04:00
dependabot[bot]	d3f86e052a	chore(deps-dev): bump llama-index from 0.11.12 to 0.11.13 (#4044 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-25 18:24:09 +02:00
dependabot[bot]	e0c65f8f9c	chore(deps): bump google-generativeai from 0.8.1 to 0.8.2 (#4050 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-25 18:22:01 +02:00
dependabot[bot]	394ab360a8	chore(deps): bump boto3 from 1.35.25 to 1.35.26 (#4048 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-25 18:21:27 +02:00
dependabot[bot]	8a146d5ced	chore(deps): bump litellm from 1.48.0 to 1.48.1 (#4049 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2024-09-25 18:20:47 +02:00
mamoodi	1d052818ae	Set runtime container image so it doesn't need to be rebuilt (#4035 )	2024-09-25 05:20:45 +02:00
tofarr	ee284bae8f	Fix server lock up on session init (#4007 )	2024-09-24 15:49:30 -06:00