add callback logs

Revert "Merge only openhands/events/stream.py from add-event-stream-diagnostics branch"
This reverts commit fb9162ac6b.
2026-04-29 03:00:45 -04:00 · 2025-04-21 15:19:34 -04:00 · 2025-04-21 15:10:43 -04:00 · 2025-04-21 18:52:49 +00:00 · 2025-04-21 18:52:34 +00:00 · 2025-04-21 14:52:19 -04:00
252 changed files with 11038 additions and 3697 deletions
@@ -12,3 +12,6 @@ assignees: ''
 **Describe the UX or technical implementation you have in mind**

 **Additional context**
+
+
+### If you find this feature request or enhancement useful, make sure to add a 👍 to the issue
@@ -1,53 +0,0 @@
-# Workflow that uses the DummyAgent to run a simple task
-name: Run E2E test with dummy agent
-
-# Always run on "main"
-# Always run on PRs
-on:
-  push:
-    branches:
-    - main
-  pull_request:
-
-# If triggered by a PR, it will be in the same group. However, each commit on main will be in its own unique group
-concurrency:
-  group: ${{ github.workflow }}-${{ (github.head_ref && github.ref) || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  test:
-    runs-on: blacksmith-4vcpu-ubuntu-2204
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set up Docker Buildx
-        id: buildx
-        uses: docker/setup-buildx-action@v3
-      - name: Install tmux
-        run: sudo apt-get update && sudo apt-get install -y tmux
-      - name: Setup Node.js
-        uses: useblacksmith/setup-node@v5
-        with:
-          node-version: '22.x'
-      - name: Install poetry via pipx
-        run: pipx install poetry
-      - name: Set up Python
-        uses: useblacksmith/setup-python@v6
-        with:
-          python-version: '3.12'
-          cache: 'poetry'
-      - name: Install Python dependencies using Poetry
-        run: poetry install --without evaluation
-      - name: Build Environment
-        run: make build
-      - name: Run tests
-        run: |
-          set -e
-          SANDBOX_FORCE_REBUILD_RUNTIME=True poetry run python3 openhands/core/main.py -t "do a flip" -d ./workspace/ -c DummyAgent
-      - name: Check exit code
-        run: |
-          if [ $? -ne 0 ]; then
-            echo "Test failed"
-            exit 1
-          else
-            echo "Test passed"
-          fi
@@ -174,20 +174,19 @@ jobs:
          build-args: ${{ env.DOCKER_BUILD_ARGS }}
          context: containers/runtime
          provenance: false
-      # Forked repos can't push to GHCR, so we need to upload the image as an artifact
+      # Forked repos can't push to GHCR, so we just build in order to populate the cache for rebuilding
      - name: Build runtime image ${{ matrix.base_image.image }} for fork
        if: github.event.pull_request.head.repo.fork
        uses: useblacksmith/build-push-action@v1
        with:
          tags: ghcr.io/${{ env.REPO_OWNER }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image.tag }}
-          outputs: type=docker,dest=/tmp/runtime-${{ matrix.base_image.tag }}.tar
          context: containers/runtime
-      - name: Upload runtime image for fork
+      - name: Upload runtime source for fork
        if: github.event.pull_request.head.repo.fork
        uses: actions/upload-artifact@v4
        with:
-          name: runtime-${{ matrix.base_image.tag }}
-          path: /tmp/runtime-${{ matrix.base_image.tag }}.tar
+          name: runtime-src-${{ matrix.base_image.tag }}
+          path: containers/runtime

  verify_hash_equivalence_in_runtime_and_app:
    name: Verify Hash Equivalence in Runtime and Docker images
@@ -258,17 +257,23 @@ jobs:
      - name: Set up Docker Buildx
        id: buildx
        uses: docker/setup-buildx-action@v3
-      # Forked repos can't push to GHCR, so we need to download the image as an artifact
-      - name: Download runtime image for fork
+      - name: Download runtime source for fork
        if: github.event.pull_request.head.repo.fork
        uses: actions/download-artifact@v4
        with:
-          name: runtime-${{ matrix.base_image.tag }}
-          path: /tmp
-      - name: Load runtime image for fork
-        if: github.event.pull_request.head.repo.fork
+          name: runtime-src-${{ matrix.base_image.tag }}
+          path: containers/runtime
+      - name: Lowercase Repository Owner
        run: |
-          docker load --input /tmp/runtime-${{ matrix.base_image.tag }}.tar
+          echo REPO_OWNER=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
+      # Forked repos can't push to GHCR, so we need to rebuild using cache
+      - name: Build runtime image ${{ matrix.base_image.image }} for fork
+        if: github.event.pull_request.head.repo.fork
+        uses: useblacksmith/build-push-action@v1
+        with:
+          load: true
+          tags: ghcr.io/${{ env.REPO_OWNER }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image.tag }}
+          context: containers/runtime
      - name: Cache Poetry dependencies
        uses: useblacksmith/cache@v5
        with:
@@ -287,9 +292,6 @@ jobs:
        run: pipx install poetry
      - name: Install Python dependencies using Poetry
        run: make install-python-dependencies POETRY_GROUP=main,test,runtime INSTALL_PLAYWRIGHT=0
-      - name: Lowercase Repository Owner
-        run: |
-          echo REPO_OWNER=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
      - name: Run docker runtime tests
        run: |
          # We install pytest-xdist in order to run tests across CPUs
@@ -327,17 +329,23 @@ jobs:
      - name: Set up Docker Buildx
        id: buildx
        uses: docker/setup-buildx-action@v3
-      # Forked repos can't push to GHCR, so we need to download the image as an artifact
-      - name: Download runtime image for fork
+      - name: Download runtime source for fork
        if: github.event.pull_request.head.repo.fork
        uses: actions/download-artifact@v4
        with:
-          name: runtime-${{ matrix.base_image.tag }}
-          path: /tmp
-      - name: Load runtime image for fork
-        if: github.event.pull_request.head.repo.fork
+          name: runtime-src-${{ matrix.base_image.tag }}
+          path: containers/runtime
+      - name: Lowercase Repository Owner
        run: |
-          docker load --input /tmp/runtime-${{ matrix.base_image.tag }}.tar
+          echo REPO_OWNER=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
+      # Forked repos can't push to GHCR, so we need to rebuild using cache
+      - name: Build runtime image ${{ matrix.base_image.image }} for fork
+        if: github.event.pull_request.head.repo.fork
+        uses: useblacksmith/build-push-action@v1
+        with:
+          load: true
+          tags: ghcr.io/${{ env.REPO_OWNER }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image.tag }}
+          context: containers/runtime
      - name: Cache Poetry dependencies
        uses: useblacksmith/cache@v5
        with:
@@ -356,9 +364,6 @@ jobs:
        run: pipx install poetry
      - name: Install Python dependencies using Poetry
        run: make install-python-dependencies POETRY_GROUP=main,test,runtime INSTALL_PLAYWRIGHT=0
-      - name: Lowercase Repository Owner
-        run: |
-          echo REPO_OWNER=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
      - name: Run runtime tests
        run: |
          # We install pytest-xdist in order to run tests across CPUs
@@ -7,7 +7,7 @@ name: Lint
 on:
  push:
    branches:
-      - main
+    - main
  pull_request:

 # If triggered by a PR, it will be in the same group. However, each commit on main will be in its own unique group
@@ -33,23 +33,9 @@ jobs:
      - name: Lint and TypeScript compilation
        run: |
          cd frontend
-          npm run lint:fix
+          npm run lint
          npm run make-i18n && tsc

-      # Commit and push changes if any as a result of lint:fix
-      - name: Check for changes
-        id: git-check
-        run: |
-          git diff --quiet || echo "changes=true" >> $GITHUB_OUTPUT
-      - name: Commit and push if there are changes
-        if: steps.git-check.outputs.changes == 'true'
-        run: |
-          git config --local user.email "openhands@all-hands.dev"
-          git config --local user.name "OpenHands Bot"
-          git add -A
-          git commit -m "🤖 Auto-fix frontend linting issues"
-          git push
-
  # Run lint on the python code
  lint-python:
    name: Lint python
@@ -16,6 +16,11 @@ on:
        type: string
        default: "main"
        description: "Target branch to pull and create PR against"
+      pr_type:
+        required: false
+        type: string
+        default: "draft"
+        description: "The PR type that is going to be created (draft, ready)"
      LLM_MODEL:
        required: false
        type: string
@@ -280,9 +285,9 @@ jobs:
            cd /tmp && python -m openhands.resolver.send_pull_request \
              --issue-number ${{ env.ISSUE_NUMBER }} \
              --target-branch ${{ env.TARGET_BRANCH }} \
-              --pr-type draft \
+              --pr-type ${{ inputs.pr_type || 'draft' }} \
              --reviewer ${{ github.actor }} | tee pr_result.txt && \
-              grep "draft created" pr_result.txt | sed 's/.*\///g' > pr_number.txt
+              grep "PR created" pr_result.txt | sed 's/.*\///g' > pr_number.txt
          else
            cd /tmp && python -m openhands.resolver.send_pull_request \
              --issue-number ${{ env.ISSUE_NUMBER }} \
@@ -54,3 +54,20 @@ Frontend:
 ## Template for Github Pull Request

 If you are starting a pull request (PR), please follow the template in `.github/pull_request_template.md`.
+
+## Implementation Details
+
+These details may or may not be useful for your current task.
+
+### Frontend
+
+#### Action Handling:
+- Actions are defined in `frontend/src/types/action-type.ts`
+- The `HANDLED_ACTIONS` array in `frontend/src/state/chat-slice.ts` determines which actions are displayed as collapsible UI elements
+- To add a new action type to the UI:
+  1. Add the action type to the `HANDLED_ACTIONS` array
+  2. Implement the action handling in `addAssistantAction` function in chat-slice.ts
+  3. Add a translation key in the format `ACTION_MESSAGE$ACTION_NAME` to the i18n files
+- Actions with `thought` property are displayed in the UI based on their action type:
+  - Regular actions (like "run", "edit") display the thought as a separate message
+  - Special actions (like "think") are displayed as collapsible elements only
@@ -118,7 +118,7 @@ poetry run pytest ./tests/unit/test_*.py
 To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker container image by
 setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.

-Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.32-nikolaik`
+Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.33-nikolaik`

 ## Develop inside Docker container

@@ -175,7 +175,7 @@ install-pre-commit-hooks:

 lint-backend:
 	@echo "$(YELLOW)Running linters...$(RESET)"
-	@poetry run pre-commit run --files openhands/**/* agenthub/**/* evaluation/**/* --show-diff-on-failure --config $(PRE_COMMIT_CONFIG_PATH)
+	@poetry run pre-commit run --files openhands/**/* evaluation/**/* tests/**/* --show-diff-on-failure --config $(PRE_COMMIT_CONFIG_PATH)

 lint-frontend:
 	@echo "$(YELLOW)Running linters for frontend...$(RESET)"
@@ -18,7 +18,7 @@
  <br/>
  <a href="https://docs.all-hands.dev/modules/usage/getting-started"><img src="https://img.shields.io/badge/Documentation-000?logo=googledocs&logoColor=FFE165&style=for-the-badge" alt="Check out the documentation"></a>
  <a href="https://arxiv.org/abs/2407.16741"><img src="https://img.shields.io/badge/Paper%20on%20Arxiv-000?logoColor=FFE165&logo=arxiv&style=for-the-badge" alt="Paper on Arxiv"></a>
-  <a href="https://huggingface.co/spaces/OpenHands/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score"></a>
+  <a href="https://docs.google.com/spreadsheets/d/1wOUdFCMyY6Nt0AIqF705KN4JKOWgeI4wUGUP60krXXs/edit?gid=0#gid=0"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score"></a>
  <hr>
 </div>

@@ -27,7 +27,7 @@ Welcome to OpenHands (formerly OpenDevin), a platform for software development a
 OpenHands agents can do anything a human developer can: modify code, run commands, browse the web,
 call APIs, and yes—even copy code snippets from StackOverflow.

-Learn more at [docs.all-hands.dev](https://docs.all-hands.dev), or jump to the [Quick Start](#-quick-start).
+Learn more at [docs.all-hands.dev](https://docs.all-hands.dev), or [sign up for OpenHands Cloud](https://app.all-hands.dev) to get started.

 > [!IMPORTANT]
 > Using OpenHands for work? We'd love to chat! Fill out
@@ -36,37 +36,50 @@ Learn more at [docs.all-hands.dev](https://docs.all-hands.dev), or jump to the [

 ![App screenshot](./docs/static/img/screenshot.png)

-## ⚡ Quick Start
+## ☁️ OpenHands Cloud
+The easiest way to get started with OpenHands is on [OpenHands Cloud](https://app.all-hands.dev),
+which comes with $50 in free credits for new users.

-The easiest way to run OpenHands is in Docker.
+## 💻 Running OpenHands Locally
+
+OpenHands can also run on your local system using Docker.
 See the [Running OpenHands](https://docs.all-hands.dev/modules/usage/installation) guide for
 system requirements and more information.

+> [!WARNING]
+> On a public network? See our [Hardened Docker Installation Guide](https://docs.all-hands.dev/modules/usage/runtimes/docker#hardened-docker-installation)
+> to secure your deployment by restricting network binding and implementing additional security measures.
+
+
 ```bash
-docker pull docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik
+docker pull docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik

 docker run -it --rm --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -e LOG_ALL_EVENTS=true \
    -v /var/run/docker.sock:/var/run/docker.sock \
    -v ~/.openhands-state:/.openhands-state \
    -p 3000:3000 \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app \
-    docker.all-hands.dev/all-hands-ai/openhands:0.32
+    docker.all-hands.dev/all-hands-ai/openhands:0.33
 ```

-> [!WARNING]
-> On a public network? See our [Hardened Docker Installation](https://docs.all-hands.dev/modules/usage/runtimes/docker#hardened-docker-installation) guide
-> to secure your deployment by restricting network binding and implementing additional security measures.
-
 You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!

-Finally, you'll need a model provider and API key.
+When you open the application, you'll be asked to choose an LLM provider and add an API key.
 [Anthropic's Claude 3.5 Sonnet](https://www.anthropic.com/api) (`anthropic/claude-3-5-sonnet-20241022`)
 works best, but you have [many options](https://docs.all-hands.dev/modules/usage/llms).

---
+## 💡 Other ways to run OpenHands
+
+> [!CAUTION]
+> OpenHands is meant to be run by a single user on their local workstation.
+> It is not appropriate for multi-tenant deployments where multiple users share the same instance. There is no built-in authentication, isolation, or scalability.
+>
+> If you're interested in running OpenHands in a multi-tenant environment, please
+> [get in touch with us](https://docs.google.com/forms/d/e/1FAIpQLSet3VbGaz8z32gW9Wm-Grl4jpt5WgMXPgJ4EDPVmCETCBpJtQ/viewform)
+> for advanced deployment options.

 You can also [connect OpenHands to your local filesystem](https://docs.all-hands.dev/modules/usage/runtimes/docker#connecting-to-your-filesystem),
 run OpenHands in a scriptable [headless mode](https://docs.all-hands.dev/modules/usage/how-to/headless-mode),
@@ -75,14 +88,6 @@ or run it on tagged issues with [a github action](https://docs.all-hands.dev/mod

 Visit [Running OpenHands](https://docs.all-hands.dev/modules/usage/installation) for more information and setup instructions.

-> [!CAUTION]
-> OpenHands is meant to be run by a single user on their local workstation.
-> It is not appropriate for multi-tenant deployments where multiple users share the same instance. There is no built-in isolation or scalability.
->
-> If you're interested in running OpenHands in a multi-tenant environment, please
-> [get in touch with us](https://docs.google.com/forms/d/e/1FAIpQLSet3VbGaz8z32gW9Wm-Grl4jpt5WgMXPgJ4EDPVmCETCBpJtQ/viewform)
-> for advanced deployment options.
-
 If you want to modify the OpenHands source code, check out [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md).

 Having issues? The [Troubleshooting Guide](https://docs.all-hands.dev/modules/usage/troubleshooting) can help.
@@ -216,13 +216,13 @@ model = "gpt-4o"
 [agent]

 # Whether the browsing tool is enabled
-codeact_enable_browsing = true
+enable_browsing = true

 # Whether the LLM draft editor is enabled
-codeact_enable_llm_editor = false
+enable_llm_editor = false

 # Whether the IPython tool is enabled
-codeact_enable_jupyter = true
+enable_jupyter = true

 # LLM config group to use
 #llm_config = 'your-llm-config-group'
@@ -11,7 +11,7 @@ services:
      - BACKEND_HOST=${BACKEND_HOST:-"0.0.0.0"}
      - SANDBOX_API_HOSTNAME=host.docker.internal
      #
-      - SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.32-nikolaik}
+      - SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.33-nikolaik}
      - SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
      - WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
    ports:
@@ -7,7 +7,7 @@ services:
    image: openhands:latest
    container_name: openhands-app-${DATE:-}
    environment:
-      - SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik}
+      - SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik}
      #- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of openhands-state for this user
      - WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
    ports:
@@ -354,12 +354,12 @@ Les options de configuration de l'agent sont définies dans les sections `[agent
  - Valeur par défaut : `true`
  - Description : Si l'appel de fonction est activé

- `codeact_enable_browsing`
+- `enable_browsing`
  - Type : `bool`
  - Valeur par défaut : `false`
  - Description : Si le délégué de navigation est activé dans l'espace d'action (fonctionne uniquement avec l'appel de fonction)

- `codeact_enable_llm_editor`
+- `enable_llm_editor`
  - Type : `bool`
  - Valeur par défaut : `false`
  - Description : Si l'éditeur LLM est activé dans l'espace d'action (fonctionne uniquement avec l'appel de fonction)
@@ -52,7 +52,7 @@ LLM_API_KEY="sk_test_12345"
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -61,7 +61,7 @@ docker run -it \
    -v /var/run/docker.sock:/var/run/docker.sock \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.33 \
    python -m openhands.core.cli
 ```

@@ -21,14 +21,18 @@ OpenHands fournit un mode Interface Graphique (GUI) convivial pour interagir ave
 3. Entrez la `Clé API` correspondante pour le fournisseur choisi.
 4. Cliquez sur "Enregistrer" pour appliquer les paramètres.

-### Configuration du Jeton GitHub
+### Jetons de Contrôle de Version
+
+OpenHands prend en charge plusieurs fournisseurs de contrôle de version. Vous pouvez configurer des jetons pour plusieurs fournisseurs simultanément.
+
+#### Configuration du Jeton GitHub

 OpenHands exporte automatiquement un `GITHUB_TOKEN` vers l'environnement shell s'il est disponible. Cela peut se produire de deux manières :

 1. **Localement (OSS)** : L'utilisateur saisit directement son jeton GitHub
 2. **En ligne (SaaS)** : Le jeton est obtenu via l'authentification OAuth GitHub

-#### Configuration d'un Jeton GitHub Local
+##### Configuration d'un Jeton GitHub Local

 1. **Générer un Personal Access Token (PAT)** :
   - Allez dans Paramètres GitHub > Paramètres développeur > Personal Access Tokens > Tokens (classique)
@@ -40,11 +44,11 @@ OpenHands exporte automatiquement un `GITHUB_TOKEN` vers l'environnement shell s

 2. **Entrer le Jeton dans OpenHands** :
   - Cliquez sur le bouton Paramètres (icône d'engrenage) en haut à droite
-   - Accédez à la section "GitHub"
+   - Accédez à la section "Git Provider Settings"
   - Collez votre jeton dans le champ "Jeton GitHub"
   - Cliquez sur "Enregistrer" pour appliquer les modifications

-#### Politiques de Jetons Organisationnels
+##### Politiques de Jetons Organisationnels

 Si vous travaillez avec des dépôts organisationnels, une configuration supplémentaire peut être nécessaire :

@@ -59,7 +63,7 @@ Si vous travaillez avec des dépôts organisationnels, une configuration supplé
   - Si nécessaire, cliquez sur "Activer SSO" à côté de votre organisation
   - Terminez le processus d'autorisation SSO

-#### Authentification OAuth (Mode En Ligne)
+##### Authentification OAuth (Mode En Ligne)

 Lorsque vous utilisez OpenHands en mode en ligne, le flux OAuth GitHub :

@@ -74,7 +78,7 @@ Lorsque vous utilisez OpenHands en mode en ligne, le flux OAuth GitHub :
   - Autorisez OpenHands à accéder à votre compte GitHub
   - Si vous utilisez une organisation, autorisez l'accès à l'organisation si vous y êtes invité

-#### Dépannage
+##### Dépannage

 Problèmes courants et solutions :

@@ -95,6 +99,43 @@ Problèmes courants et solutions :
   - Vérifiez la console du navigateur pour tout message d'erreur
   - Utilisez le bouton "Tester la connexion" dans les paramètres s'il est disponible

+#### Configuration du Jeton GitLab
+
+OpenHands exporte automatiquement un `GITLAB_TOKEN` vers l'environnement shell, uniquement pour les installations locales, s'il est disponible.
+
+##### Configuration d'un Jeton GitLab
+
+1. **Générer un Personal Access Token (PAT)** :
+   - Sur GitLab, allez dans Paramètres utilisateur > Jetons d'accès
+   - Créez un nouveau jeton avec les portées suivantes :
+     - `api` (Accès API)
+     - `read_user` (Lecture des informations utilisateur)
+     - `read_repository` (Lecture du dépôt)
+     - `write_repository` (Écriture du dépôt)
+   - Définissez une date d'expiration ou laissez vide pour un jeton sans expiration
+
+2. **Entrer le Jeton dans OpenHands** :
+   - Cliquez sur le bouton Paramètres (icône d'engrenage)
+   - Accédez à la section `Git Provider Settings`
+   - Collez votre jeton dans le champ `Jeton GitLab`
+   - Si vous utilisez GitLab auto-hébergé, entrez l'URL de votre instance GitLab
+   - Cliquez sur `Enregistrer les modifications` pour appliquer les changements
+
+##### Dépannage
+
+Problèmes courants et solutions :
+
+1. **Jeton Non Reconnu** :
+   - Assurez-vous que le jeton est correctement enregistré dans les paramètres
+   - Vérifiez que le jeton n'a pas expiré
+   - Vérifiez que le jeton a les portées requises
+   - Pour les instances auto-hébergées, vérifiez l'URL correcte de l'instance
+
+2. **Accès Refusé** :
+   - Vérifiez les permissions d'accès au projet
+   - Vérifiez si le jeton possède les portées nécessaires
+   - Pour les dépôts de groupe/organisation, assurez-vous d'avoir les accès appropriés
+
 ### Paramètres Avancés

 1. Basculez sur `Options Avancées` pour accéder aux paramètres supplémentaires.
@@ -46,7 +46,7 @@ LLM_API_KEY="sk_test_12345"
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -56,6 +56,6 @@ docker run -it \
    -v /var/run/docker.sock:/var/run/docker.sock \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.33 \
    python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
 ```
@@ -13,16 +13,16 @@
 La façon la plus simple d'exécuter OpenHands est avec Docker.

 ```bash
-docker pull docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik
+docker pull docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik

 docker run -it --rm --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -e LOG_ALL_EVENTS=true \
    -v /var/run/docker.sock:/var/run/docker.sock \
    -p 3000:3000 \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app \
-    docker.all-hands.dev/all-hands-ai/openhands:0.32
+    docker.all-hands.dev/all-hands-ai/openhands:0.33
 ```

 Vous pouvez également exécuter OpenHands en mode [headless scriptable](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), en tant que [CLI interactive](https://docs.all-hands.dev/modules/usage/how-to/cli-mode), ou en utilisant l'[Action GitHub OpenHands](https://docs.all-hands.dev/modules/usage/how-to/github-action).
@@ -13,7 +13,7 @@ C'est le Runtime par défaut qui est utilisé lorsque vous démarrez OpenHands.

 ```
 docker run # ...
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -v /var/run/docker.sock:/var/run/docker.sock \
    # ...
 ```
@@ -348,12 +348,12 @@ dockerコマンドで使用する場合は、`-e LLM_<option>`として渡しま
  - デフォルト値: `true`
  - 説明: 関数呼び出しが有効かどうか

- `codeact_enable_browsing`
+- `enable_browsing`
  - 型: `bool`
  - デフォルト値: `false`
  - 説明: アクションスペースでブラウジングデリゲートが有効かどうか（関数呼び出しでのみ機能）

- `codeact_enable_llm_editor`
+- `enable_llm_editor`
  - 型: `bool`
  - デフォルト値: `false`
  - 説明: アクションスペースでLLMエディタが有効かどうか（関数呼び出しでのみ機能）
@@ -34,7 +34,7 @@ Docker で OpenHands を CLI モードで実行するには:
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -44,7 +44,7 @@ docker run -it \
    -v ~/.openhands-state:/.openhands-state \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.33 \
    python -m openhands.core.cli
 ```

@@ -16,7 +16,11 @@ OpenHandsは、AI アシスタントとやり取りするためのグラフィ
 3. 選択したプロバイダーに対応する`API Key`を入力します。
 4. `Save Changes`をクリックして設定を適用します。

-### GitHubトークンの設定
+### バージョン管理トークン
+
+OpenHandsは複数のバージョン管理プロバイダーをサポートしています。複数のプロバイダーのトークンを同時に設定できます。
+
+#### GitHubトークンの設定

 OpenHandsは、利用可能な場合、自動的に`GITHUB_TOKEN`をシェル環境にエクスポートします。これは2つの方法で行われます。

@@ -34,7 +38,7 @@ OpenHandsは、利用可能な場合、自動的に`GITHUB_TOKEN`をシェル環
     - Minimal Permissions（検索用に**Meta Data = Read-only**を選択し、ブランチ作成用に**Pull Requests = Read and Write**、**Content = Read and Write**を選択します）
  2. **OpenHandsにトークンを入力**:
   - 設定ボタン（歯車アイコン）をクリックします。
-   - `GitHub Settings`セクションに移動します。
+   - `Git Provider Settings`セクションに移動します。
   - `GitHub Token`フィールドにトークンを貼り付けます。
   - `Save Changes`をクリックして変更を適用します。
 </details>
@@ -94,6 +98,46 @@ OpenHandsは、利用可能な場合、自動的に`GITHUB_TOKEN`をシェル環
   - 組織を使用している場合は、プロンプトが表示されたら組織へのアクセスを承認します。
 </details>

+#### GitLabトークンの設定
+
+OpenHandsは、利用可能な場合、ローカルインストールのみ、自動的に`GITLAB_TOKEN`をシェル環境にエクスポートします。
+
+<details>
+  <summary>GitLabトークンの設定</summary>
+
+  1. **Personal Access Token（PAT）の生成**:
+   - GitLabで、User Settings > Access Tokensに移動します。
+   - 以下のスコープを持つ新しいトークンを作成します:
+     - `api`（APIアクセス）
+     - `read_user`（ユーザー情報の読み取り）
+     - `read_repository`（リポジトリ読み取り）
+     - `write_repository`（リポジトリ書き込み）
+   - 有効期限を設定するか、無期限トークンの場合は空白のままにします。
+  2. **OpenHandsにトークンを入力**:
+   - 設定ボタン（歯車アイコン）をクリックします。
+   - `Git Provider Settings`セクションに移動します。
+   - `GitLab Token`フィールドにトークンを貼り付けます。
+   - セルフホスト型GitLabを使用している場合は、GitLabインスタンスのURLを入力します。
+   - `Save Changes`をクリックして変更を適用します。
+</details>
+
+<details>
+  <summary>トラブルシューティング</summary>
+
+  一般的な問題と解決策:
+
+  - **トークンが認識されない**:
+     - トークンが設定に正しく保存されていることを確認します。
+     - トークンの有効期限が切れていないことを確認します。
+     - トークンに必要なスコープがあることを確認します。
+     - セルフホスト型インスタンスの場合は、正しいインスタンスURLを確認します。
+
+  - **アクセスが拒否された**:
+     - プロジェクトのアクセス権限を確認します。
+     - トークンに必要なスコープがあるかどうかを確認します。
+     - グループ/組織のリポジトリの場合は、適切なアクセス権があることを確認します。
+</details>
+
 ### 高度な設定

 1. 設定ページ内で、`Advanced`オプションを切り替えて追加の設定にアクセスします。
@@ -31,7 +31,7 @@ DockerでOpenHandsをヘッドレスモードで実行するには:
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -42,7 +42,7 @@ docker run -it \
    -v ~/.openhands-state:/.openhands-state \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.33 \
    python -m openhands.core.main -t "write a bash script that prints hi"
 ```

@@ -25,7 +25,7 @@ nikolaik の `SANDBOX_RUNTIME_CONTAINER_IMAGE` は、ランタイムサーバー

    ```bash
    docker run # ...
-        -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+        -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
        -e SANDBOX_USER_ID=$(id -u) \
        -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
        -v $WORKSPACE_BASE:/opt/workspace_base \
@@ -82,5 +82,5 @@ docker network create openhands-network
 # 分離されたネットワークで OpenHands を実行
 docker run # ... \
    --network openhands-network \
-    docker.all-hands.dev/all-hands-ai/openhands:0.32
+    docker.all-hands.dev/all-hands-ai/openhands:0.33
 ```
@@ -292,17 +292,17 @@ As opções de configuração do agente são definidas nas seções `[agent]` e
  - Padrão: `true`
  - Descrição: Se a chamada de função está habilitada

- `codeact_enable_browsing`
+- `enable_browsing`
  - Tipo: `bool`
  - Padrão: `false`
  - Descrição: Se o delegado de navegação está habilitado no espaço de ação (funciona apenas com chamada de função)

- `codeact_enable_llm_editor`
+- `enable_llm_editor`
  - Tipo: `bool`
  - Padrão: `false`
  - Descrição: Se o editor LLM está habilitado no espaço de ação (funciona apenas com chamada de função)

- `codeact_enable_jupyter`
+- `enable_jupyter`
  - Tipo: `bool`
  - Padrão: `false`
  - Descrição: Se o Jupyter está habilitado no espaço de ação
@@ -35,7 +35,7 @@ Para executar o OpenHands no modo CLI com Docker:
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -45,7 +45,7 @@ docker run -it \
    -v ~/.openhands-state:/.openhands-state \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.33 \
    python -m openhands.core.cli
 ```

@@ -17,7 +17,11 @@ O OpenHands fornece um modo de Interface Gráfica do Usuário (GUI) para interag
 3. Insira a `Chave de API` correspondente para o provedor escolhido.
 4. Clique em `Salvar Alterações` para aplicar as configurações.

-### Configuração do Token do GitHub
+### Tokens de Controle de Versão
+
+O OpenHands suporta múltiplos provedores de controle de versão. Você pode configurar tokens para vários provedores simultaneamente.
+
+#### Configuração do Token do GitHub

 O OpenHands exporta automaticamente um `GITHUB_TOKEN` para o ambiente shell se ele estiver disponível. Isso pode acontecer de duas maneiras:

@@ -35,7 +39,7 @@ O OpenHands exporta automaticamente um `GITHUB_TOKEN` para o ambiente shell se e
     - Minimal Permissions (Selecione **Meta Data = Read-only** para pesquisa, **Pull Requests = Read and Write**, **Content = Read and Write** para criação de branches)
  2. **Insira o Token no OpenHands**:
   - Clique no botão Settings (ícone de engrenagem).
-   - Navegue até a seção `GitHub Settings`.
+   - Navegue até a seção `Git Provider Settings`.
   - Cole seu token no campo `GitHub Token`.
   - Clique em `Save Changes` para aplicar as alterações.
 </details>
@@ -95,6 +99,46 @@ O OpenHands exporta automaticamente um `GITHUB_TOKEN` para o ambiente shell se e
   - Se estiver usando uma organização, autorize o acesso à organização se solicitado.
 </details>

+#### Configuração do Token do GitLab
+
+O OpenHands exporta automaticamente um `GITLAB_TOKEN` para o ambiente shell, apenas para instalações locais, se ele estiver disponível.
+
+<details>
+  <summary>Configurando um Token do GitLab</summary>
+
+  1. **Gere um Personal Access Token (PAT)**:
+   - No GitLab, vá para User Settings > Access Tokens.
+   - Crie um novo token com os seguintes escopos:
+     - `api` (Acesso à API)
+     - `read_user` (Leitura de informações do usuário)
+     - `read_repository` (Leitura do repositório)
+     - `write_repository` (Escrita no repositório)
+   - Defina uma data de expiração ou deixe em branco para um token sem expiração.
+  2. **Insira o Token no OpenHands**:
+   - Clique no botão Settings (ícone de engrenagem).
+   - Navegue até a seção `Git Provider Settings`.
+   - Cole seu token no campo `GitLab Token`.
+   - Se estiver usando GitLab auto-hospedado, insira a URL da sua instância GitLab.
+   - Clique em `Save Changes` para aplicar as alterações.
+</details>
+
+<details>
+  <summary>Solução de Problemas</summary>
+
+  Problemas comuns e soluções:
+
+  - **Token Não Reconhecido**:
+     - Certifique-se de que o token esteja salvo corretamente nas configurações.
+     - Verifique se o token não expirou.
+     - Verifique se o token possui os escopos necessários.
+     - Para instâncias auto-hospedadas, verifique a URL correta da instância.
+
+  - **Acesso Negado**:
+     - Verifique as permissões de acesso ao projeto.
+     - Verifique se o token possui os escopos necessários.
+     - Para repositórios de grupo/organização, certifique-se de ter o acesso adequado.
+</details>
+
 ### Configurações Avançadas

 1. Dentro da página Settings, ative as opções `Advanced` para acessar configurações adicionais.
@@ -32,7 +32,7 @@ Para executar o OpenHands no modo Headless com Docker:
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -43,7 +43,7 @@ docker run -it \
    -v ~/.openhands-state:/.openhands-state \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.33 \
    python -m openhands.core.main -t "escreva um script bash que imprima oi"
 ```

@@ -58,17 +58,17 @@
 A maneira mais fácil de executar o OpenHands é no Docker.

 ```bash
-docker pull docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik
+docker pull docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik

 docker run -it --rm --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -e LOG_ALL_EVENTS=true \
    -v /var/run/docker.sock:/var/run/docker.sock \
    -v ~/.openhands-state:/.openhands-state \
    -p 3000:3000 \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app \
-    docker.all-hands.dev/all-hands-ai/openhands:0.32
+    docker.all-hands.dev/all-hands-ai/openhands:0.33
 ```

 Você encontrará o OpenHands em execução em http://localhost:3000!
@@ -13,7 +13,7 @@ Este é o Runtime padrão que é usado quando você inicia o OpenHands. Você po

 ```
 docker run # ...
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -v /var/run/docker.sock:/var/run/docker.sock \
    # ...
 ```
@@ -349,17 +349,17 @@ Agent 配置选项在 `config.toml` 文件的 `[agent]` 和 `[agent.<agent_name>
  - 默认值: `true`
  - 描述: 是否启用函数调用

- `codeact_enable_browsing`
+- `enable_browsing`
  - 类型: `bool`
  - 默认值: `false`
  - 描述: 是否在 action space 中启用浏览代理(仅适用于函数调用)

- `codeact_enable_llm_editor`
+- `enable_llm_editor`
  - 类型: `bool`
  - 默认值: `false`
  - 描述: 是否在 action space 中启用 LLM 编辑器(仅适用于函数调用)

- `codeact_enable_jupyter`
+- `enable_jupyter`
  - 类型: `bool`
  - 默认值: `false`
  - 描述: 是否在 action space 中启用 Jupyter
@@ -50,7 +50,7 @@ LLM_API_KEY="sk_test_12345"
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -59,7 +59,7 @@ docker run -it \
    -v /var/run/docker.sock:/var/run/docker.sock \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.33 \
    python -m openhands.core.cli
 ```

@@ -19,14 +19,18 @@ OpenHands 提供了一个用户友好的图形用户界面（GUI）模式，用
 3. 输入所选提供商对应的 `API Key`。
 4. 点击"保存"应用设置。

-### GitHub Token 设置
+### 版本控制令牌
+
+OpenHands 支持多个版本控制提供商。您可以同时配置多个提供商的令牌。
+
+#### GitHub Token 设置

 如果可用，OpenHands 会自动将 `GITHUB_TOKEN` 导出到 shell 环境中。这可以通过两种方式实现：

 1. **本地（OSS）**：用户直接输入他们的 GitHub token
 2. **在线（SaaS）**：通过 GitHub OAuth 身份验证获取 token

-#### 设置本地 GitHub Token
+##### 设置本地 GitHub Token

 1. **生成个人访问令牌（PAT）**：
   - 转到 GitHub 设置 > 开发者设置 > 个人访问令牌 > 令牌（经典）
@@ -38,11 +42,11 @@ OpenHands 提供了一个用户友好的图形用户界面（GUI）模式，用

 2. **在 OpenHands 中输入令牌**：
   - 点击右上角的设置按钮（齿轮图标）
-   - 导航到"GitHub"部分
+   - 导航到"Git Provider Settings"部分
   - 将令牌粘贴到"GitHub Token"字段中
   - 点击"保存"应用更改

-#### 组织令牌策略
+##### 组织令牌策略

 如果您使用组织仓库，可能需要额外的设置：

@@ -57,7 +61,7 @@ OpenHands 提供了一个用户友好的图形用户界面（GUI）模式，用
   - 如果需要，点击组织旁边的"启用 SSO"
   - 完成 SSO 授权过程

-#### OAuth 身份验证（在线模式）
+##### OAuth 身份验证（在线模式）

 在在线模式下使用 OpenHands 时，GitHub OAuth 流程：

@@ -72,7 +76,7 @@ OpenHands 提供了一个用户友好的图形用户界面（GUI）模式，用
   - 授权 OpenHands 访问您的 GitHub 帐户
   - 如果使用组织，在出现提示时授权组织访问

-#### 故障排除
+##### 故障排除

 常见问题和解决方案：

@@ -93,6 +97,43 @@ OpenHands 提供了一个用户友好的图形用户界面（GUI）模式，用
   - 检查浏览器控制台中是否有任何错误消息
   - 如果可用，使用设置中的"测试连接"按钮

+#### GitLab Token 设置
+
+OpenHands 会自动将 `GITLAB_TOKEN` 导出到 shell 环境中，仅适用于本地安装，如果它可用的话。
+
+##### 设置 GitLab Token
+
+1. **生成个人访问令牌（PAT）**：
+   - 在 GitLab 中，转到用户设置 > 访问令牌
+   - 创建具有以下范围的新令牌：
+     - `api`（API 访问）
+     - `read_user`（读取用户信息）
+     - `read_repository`（读取仓库）
+     - `write_repository`（写入仓库）
+   - 设置过期日期或留空以获取永不过期的令牌
+
+2. **在 OpenHands 中输入令牌**：
+   - 点击设置按钮（齿轮图标）
+   - 导航到 `Git Provider Settings` 部分
+   - 将令牌粘贴到 `GitLab Token` 字段中
+   - 如果使用自托管 GitLab，请输入您的 GitLab 实例 URL
+   - 点击 `Save Changes` 应用更改
+
+##### 故障排除
+
+常见问题和解决方案：
+
+1. **令牌无法识别**：
+   - 确保令牌已正确保存在设置中
+   - 检查令牌是否已过期
+   - 验证令牌是否具有所需的范围
+   - 对于自托管实例，验证正确的实例 URL
+
+2. **访问被拒绝**：
+   - 验证项目访问权限
+   - 检查令牌是否具有必要的范围
+   - 对于组/组织仓库，确保您拥有适当的访问权限
+
 ### 高级设置

 1. 切换`高级选项`以访问其他设置。
@@ -47,7 +47,7 @@ LLM_API_KEY="sk_test_12345"
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -57,6 +57,6 @@ docker run -it \
    -v /var/run/docker.sock:/var/run/docker.sock \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.33 \
    python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
 ```
@@ -11,16 +11,16 @@
 在 Docker 中运行 OpenHands 是最简单的方式。

 ```bash
-docker pull docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik
+docker pull docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik

 docker run -it --rm --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -e LOG_ALL_EVENTS=true \
    -v /var/run/docker.sock:/var/run/docker.sock \
    -p 3000:3000 \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app \
-    docker.all-hands.dev/all-hands-ai/openhands:0.32
+    docker.all-hands.dev/all-hands-ai/openhands:0.33
 ```

 你也可以在可脚本化的[无头模式](https://docs.all-hands.dev/modules/usage/how-to/headless-mode)下运行 OpenHands，作为[交互式 CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode)，或使用 [OpenHands GitHub Action](https://docs.all-hands.dev/modules/usage/how-to/github-action)。
@@ -11,7 +11,7 @@

 ```
 docker run # ...
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -v /var/run/docker.sock:/var/run/docker.sock \
    # ...
 ```
@@ -19,7 +19,7 @@ After visiting OpenHands Cloud, you will be asked to connect with your GitHub ac
 ### Adding Repository Access

 You can grant OpenHands specific repository access:
-1. Click the `Select a GitHub project` dropdown, select `Add more repositories...`.
+1. Click the `Select a Git project` dropdown, select `Add more repositories...`.
 2. Select the organization, then choose the specific repositories to grant OpenHands access to.
   <details>
     <summary>Permission Details for Repository Access</summary>
@@ -45,7 +45,7 @@ You can grant OpenHands specific repository access:
 ### Modifying Repository Access

 You can modify repository access at any time by:
-* Using the same `Select a GitHub project > Add more repositories` workflow, or
+* Using the same `Select a Git project > Add more repositories` workflow, or
 * Visiting the Settings page and selecting `Configure GitHub Repositories` under the `GitHub Settings` section.

 ## Conversation Persistence
@@ -291,17 +291,17 @@ The agent configuration options are defined in the `[agent]` and `[agent.<agent_
  - Default: `true`
  - Description: Whether function calling is enabled

- `codeact_enable_browsing`
+- `enable_browsing`
  - Type: `bool`
  - Default: `false`
  - Description: Whether browsing delegate is enabled in the action space (only works with function calling)

- `codeact_enable_llm_editor`
+- `enable_llm_editor`
  - Type: `bool`
  - Default: `false`
  - Description: Whether LLM editor is enabled in the action space (only works with function calling)

- `codeact_enable_jupyter`
+- `enable_jupyter`
  - Type: `bool`
  - Default: `false`
  - Description: Whether Jupyter is enabled in the action space
@@ -35,7 +35,7 @@ To run OpenHands in CLI mode with Docker:
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -45,8 +45,11 @@ docker run -it \
    -v ~/.openhands-state:/.openhands-state \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.33 \
    python -m openhands.core.cli
 ```

 This command will start an interactive session in Docker where you can input tasks and receive responses from OpenHands.
+
+The `-e SANDBOX_USER_ID=$(id -u)` is passed to the Docker command to ensure the sandbox user matches the host user’s
+permissions. This prevents the agent from creating root-owned files in the mounted workspace.
@@ -1,7 +1,8 @@
 # Custom Sandbox

 :::note
-This guide is for users that would like to use their own custom Docker image for the runtime, e.g. with certain tools or programming languages pre-installed
+This guide is for users that would like to use their own custom Docker image for the runtime. For example
+with certain tools or programming languages pre-installed.
 :::

 The sandbox is where the agent performs its tasks. Instead of running commands directly on your computer
@@ -18,11 +18,14 @@ OpenHands provides a Graphical User Interface (GUI) mode for interacting with th
 3. Enter the corresponding `API Key` for your chosen provider.
 4. Click `Save Changes` to apply the settings.

-### GitHub Token Setup
+### Version Control Tokens

-OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if it is available. This can happen in two ways:
+OpenHands supports multiple version control providers. You can configure tokens for multiple providers simultaneously.
+
+#### GitHub Token Setup
+
+OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if provided:

-**Local Installation**: The user directly inputs their GitHub token.
 <details>
  <summary>Setting Up a GitHub Token</summary>

@@ -36,9 +39,8 @@ OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if it
     - Minimal Permissions ( Select `Meta Data = Read-only` read for search, `Pull Requests = Read and Write` and `Content = Read and Write` for branch creation)
  2. **Enter Token in OpenHands**:
   - Click the Settings button (gear icon).
-   - Navigate to the `GitHub Settings` section.
   - Paste your token in the `GitHub Token` field.
-   - Click `Save Changes` to apply the changes.
+   - Click `Save` to apply the changes.
 </details>

 <details>
@@ -79,21 +81,43 @@ OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if it
     - Check the browser console for any error messages.
 </details>

-**OpenHands Cloud**: The token is obtained through GitHub OAuth authentication.
+#### GitLab Token Setup
+
+OpenHands automatically exports a `GITLAB_TOKEN` to the shell environment if provided:

 <details>
-  <summary>OAuth Authentication</summary>
+  <summary>Setting Up a GitLab Token</summary>

-  When using OpenHands Cloud, the GitHub OAuth flow requests the following permissions:
-   - Repository access (read/write)
-   - Workflow management
-   - Organization read access
+  1. **Generate a Personal Access Token (PAT)**:
+   - On GitLab, go to User Settings > Access Tokens.
+   - Create a new token with the following scopes:
+     - `api` (API access)
+     - `read_user` (Read user information)
+     - `read_repository` (Read repository)
+     - `write_repository` (Write repository)
+   - Set an expiration date or leave it blank for a non-expiring token.
+  2. **Enter Token in OpenHands**:
+   - Click the Settings button (gear icon).
+   - Paste your token in the `GitLab Token` field.
+   - Enter your GitLab instance URL if using self-hosted GitLab.
+   - Click `Save` to apply the changes.
+</details>

-  To authenticate OpenHands:
-   - Click `Sign in with GitHub` when prompted.
-   - Review the requested permissions.
-   - Authorize OpenHands to access your GitHub account.
-   - If using an organization, authorize organization access if prompted.
+<details>
+  <summary>Troubleshooting</summary>
+
+  Common issues and solutions:
+
+  - **Token Not Recognized**:
+     - Ensure the token is properly saved in settings.
+     - Check that the token hasn't expired.
+     - Verify the token has the required scopes.
+     - For self-hosted instances, verify the correct instance URL.
+
+  - **Access Denied**:
+     - Verify project access permissions.
+     - Check if the token has the necessary scopes.
+     - For group/organization repositories, ensure you have proper access.
 </details>

 ### Advanced Settings
@@ -32,7 +32,7 @@ To run OpenHands in Headless mode with Docker:
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -43,10 +43,13 @@ docker run -it \
    -v ~/.openhands-state:/.openhands-state \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.33 \
    python -m openhands.core.main -t "write a bash script that prints hi"
 ```

+The `-e SANDBOX_USER_ID=$(id -u)` is passed to the Docker command to ensure the sandbox user matches the host user’s
+permissions. This prevents the agent from creating root-owned files in the mounted workspace.
+
 ## Advanced Headless Configurations

 To view all available configuration options for headless mode, run the Python command with the `--help` flag.
@@ -58,17 +58,17 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to
 The easiest way to run OpenHands is in Docker.

 ```bash
-docker pull docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik
+docker pull docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik

 docker run -it --rm --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
    -e LOG_ALL_EVENTS=true \
    -v /var/run/docker.sock:/var/run/docker.sock \
    -v ~/.openhands-state:/.openhands-state \
    -p 3000:3000 \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app \
-    docker.all-hands.dev/all-hands-ai/openhands:0.32
+    docker.all-hands.dev/all-hands-ai/openhands:0.33
 ```

 You'll find OpenHands running at http://localhost:3000!
@@ -12,6 +12,9 @@
  * Modify files
  * Upload and download files

+### Terminal
+- A space for OpenHands and users to run terminal commands.
+
 ### Jupyter
 - Shows all Python commands that were executed by OpenHands.
 - Particularly handy when using OpenHands to perform data visualization tasks.
@@ -23,6 +26,3 @@
 ### Browser
 - Used by OpenHands to browse websites.
 - The browser is non-interactive.
-
-### Terminal
- A space for OpenHands and users to run terminal commands.
@@ -17,6 +17,8 @@ Based on these findings and community feedback, the following models have been v
 - [gemini/gemini-2.5-pro](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/)
 - [deepseek/deepseek-chat](https://api-docs.deepseek.com/)
 - [openai/o3-mini](https://openai.com/index/openai-o3-mini/)
+- [openai/o3](https://openai.com/index/introducing-o3-and-o4-mini/)
+- [openai/o4-mini](https://openai.com/index/introducing-o3-and-o4-mini/)
 - [all-hands/openhands-lm-32b-v0.1](https://www.all-hands.dev/blog/introducing-openhands-lm-32b----a-strong-open-coding-agent-model) -- available through [OpenRouter](https://openrouter.ai/all-hands/openhands-lm-32b-v0.1)


@@ -35,8 +35,8 @@ A useful feature is the ability to connect to your local filesystem. To mount yo
 Be careful! There's nothing stopping the OpenHands agent from deleting or modifying
 any files that are mounted into its workspace.

-This setup can cause some issues with file permissions (hence the `SANDBOX_USER_ID` variable)
-but seems to work well on most systems.
+The `-e SANDBOX_USER_ID=$(id -u)` is passed to the Docker command to ensure the sandbox user matches the host user’s
+permissions. This prevents the agent from creating root-owned files in the mounted workspace.

 ## Hardened Docker Installation

@@ -20,3 +20,18 @@ Try these in order:
 * If using Docker Desktop, ensure `Settings > Advanced > Allow the default Docker socket to be used` is enabled.
 * Depending on your configuration you may need `Settings > Resources > Network > Enable host networking` enabled in Docker Desktop.
 * Reinstall Docker Desktop.
+
+### Permission Error
+
+**Description**
+
+On initial prompt, an error is seen with `Permission Denied` or `PermissionError`.
+
+**Resolution**
+
+* Check if the `~/.openhands-state` is owned by `root`. If so, you can:
+  * Change the directory's ownership: `sudo chown <user>:<user> ~/.openhands-state`.
+  * or update permissions on the directory: `sudo chmod 777 ~/.openhands-state`
+  * or delete it if you don’t need previous data. OpenHands will recreate it. You'll need to re-enter LLM settings.
+* If mounting a local directory, ensure your `WORKSPACE_BASE` has the necessary permissions for the user running
+  OpenHands.
@@ -131,9 +131,9 @@ def get_config(
        )
    )
    agent_config = AgentConfig(
-        codeact_enable_jupyter=False,
-        codeact_enable_browsing=RUN_WITH_BROWSING,
-        codeact_enable_llm_editor=False,
+        enable_jupyter=False,
+        enable_browsing=RUN_WITH_BROWSING,
+        enable_llm_editor=False,
    )
    config.set_agent_config(agent_config)
    return config
@@ -79,8 +79,8 @@ def get_config(
    agent_config.enable_prompt_extensions = False
    agent_config = AgentConfig(
        function_calling=False,
-        codeact_enable_jupyter=True,
-        codeact_enable_browsing_delegate=True,
+        enable_jupyter=True,
+        enable_browsing=True,
    )
    config.set_agent_config(agent_config)
    return config
@@ -0,0 +1 @@
+config.yaml
@@ -0,0 +1,35 @@
+# CI Builds Repair Benchmark Integration
+
+This module integrates the CI Builds Repair benchmark developed by [JetBrains-Research](https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair/ci-builds-repair-benchmark).
+
+For more information, refer to the [GitHub repository](https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair/ci-builds-repair-benchmark) and the associated [research paper](https://arxiv.org/abs/2406.11612).
+See notice below for details
+
+## Setup
+
+Before running any scripts, make sure to configure the benchmark by setting up `config.yaml`.
+This benchmark pushes to JetBrains' private GitHub repository. You will to request a `token_gh` provided by their team, to run this benchmark.
+
+## Inference
+
+To run inference with your model:
+
+```bash
+./evaluation/benchmarks/lca_ci_build_repair/scripts/run_infer.sh llm.yourmodel
+```
+
+## Evaluation
+
+To evaluate the predictions:
+
+```bash
+./evaluation/benchmarks/lca_ci_build_repair/scripts/eval_infer.sh predictions_path_containing_output
+```
+
+## Results
+The benchmark contains 68 instances, we skip instances #126 and #145, and only run 66 instances due to dockerization errors.
+
+Due to running in live GitHub machines, the benchmark is sensitive to the date it is run. Even the golden patches in the dataset might present failures due to updates.
+For example, on 2025-04-09, running the benchmark against the golden patches gave 57/67 successes, with 1 job left in the waiting list.
+
+On 2025-04-10, running the benchmark full with OH and no oracle, 37 succeeded. That is 54% of the complete set of 68 instances and 64% of the 57 that succeed with golden patches.
@@ -0,0 +1,11 @@
+LCA_PATH: path #where to clone lca-ci rep
+model_name: OpenHands
+benchmark_owner: ICML-25-BenchName-builds-repair
+token_gh: your_token
+#for lca-ci-repo
+repos_folder: /path/to/repos # here the cloned repos would be stored
+out_folder: /out/folder # here the result files would be stored
+data_cache_dir: /data/cache/dir/ # here the cached dataset would be stored
+username_gh: username-gh # your GitHub username
+# test_username: test_user # username that would be displayed in the benchmark. Optional. If ommitted, username_gh would be used
+language: Python # dataset language (now only Python is available)
@@ -0,0 +1,242 @@
+"""Implements evaluation on JetBrains CI builds repair baselines
+
+Please see https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair
+and https://huggingface.co/datasets/JetBrains-Research/lca-ci-builds-repair
+
+TODOs:
+- Add more flags
+"""
+
+import json
+import os
+from pathlib import Path
+
+import ruamel.yaml
+
+from evaluation.utils.shared import (
+    EvalMetadata,
+    get_default_sandbox_config_for_eval,
+    make_metadata,
+)
+from openhands.core.config import (
+    AppConfig,
+    LLMConfig,
+    get_parser,
+    load_app_config,
+)
+from openhands.core.logger import openhands_logger as logger
+from openhands.core.main import create_runtime
+from openhands.events.action import CmdRunAction
+from openhands.events.observation import CmdOutputObservation
+from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
+
+
+def get_config(
+    metadata: EvalMetadata,
+) -> AppConfig:
+    sandbox_config = get_default_sandbox_config_for_eval()
+    sandbox_config.base_container_image = 'python:3.12-bookworm'
+    config = AppConfig(
+        default_agent=metadata.agent_class,
+        run_as_openhands=False,
+        runtime='docker',
+        max_iterations=metadata.max_iterations,
+        sandbox=sandbox_config,
+        # do not mount workspace
+        workspace_base=None,
+        workspace_mount_path=None,
+    )
+    config.set_llm_config(metadata.llm_config)
+    agent_config = config.get_agent_config(metadata.agent_class)
+    agent_config.enable_prompt_extensions = False
+    return config
+
+
+config = load_app_config()
+
+
+def load_bench_config():
+    script_dir = os.path.dirname(
+        os.path.abspath(__file__)
+    )  # Get the absolute path of the script
+    config_path = os.path.join(script_dir, 'config.yaml')
+    yaml = ruamel.yaml.YAML(typ='rt')
+    with open(config_path, 'r') as file:
+        return yaml.load(file)
+
+
+bench_config = load_bench_config()
+
+
+def run_eval(
+    runtime: Runtime,
+):
+    """Run the evaluation and create report"""
+    logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
+    obs: CmdOutputObservation
+
+    lca_path = bench_config['LCA_PATH']
+    lca_ci_path = os.path.join(
+        lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
+    )
+
+    model_name = bench_config['model_name']
+
+    action = CmdRunAction(command=f'mkdir {lca_path}')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
+    action = CmdRunAction(command=f'cd {lca_path}')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
+    lca_repo_url = 'https://github.com/juanmichelini/lca-baselines'
+    action = CmdRunAction(command=f'git clone {lca_repo_url}')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
+    action = CmdRunAction(command=f'cd {lca_ci_path}')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
+    action = CmdRunAction(command='git switch open-hands-integration')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
+    script_dir = os.path.dirname(
+        os.path.abspath(__file__)
+    )  # Get the absolute path of the script
+    config_path = os.path.join(script_dir, 'config.yaml')
+    runtime.copy_to(config_path, lca_ci_path)
+
+    token_gh = bench_config['token_gh']
+    commandf = f'export TOKEN_GH={token_gh}'
+    action = CmdRunAction(command=commandf)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+
+    action = CmdRunAction(command='poetry install')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+
+    # Set up the task environment
+    commandf = f'poetry run python run_eval_jobs.py --model-name "{model_name}" --config-path "{lca_ci_path}/config.yaml" --job-ids-file "/tmp/output_lca.jsonl" --result-filename "testfile.jsonl"  > /tmp/single_output.txt'
+    action = CmdRunAction(command=commandf)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(f'run_eval_jobs.py gave {obs.content} !')
+    # assert obs.exit_code == 0
+
+    commandf = 'cat /tmp/single_output.txt'
+    action = CmdRunAction(command=commandf)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    logger.info(f' {commandf} gave {obs.content}!')
+
+    testfile_path = os.path.join(bench_config['out_folder'], 'testfile.jsonl')
+    commandf = f'cat {testfile_path}'
+    action = CmdRunAction(command=commandf)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    report_str = obs.content
+
+    logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
+    return report_str
+
+
+def process_predictions(predictions_path: str):
+    output_path = Path(predictions_path)
+    if output_path.suffix != '.jsonl':
+        raise ValueError('output_path must end in .jsonl')
+
+    output_lca_path = output_path.with_name(output_path.stem + '_lca.jsonl')
+
+    with output_path.open() as infile, output_lca_path.open('w') as outfile:
+        for line in infile:
+            data = json.loads(line)
+            json.dump(data.get('test_result'), outfile)
+            outfile.write('\n')
+
+    return str(output_lca_path)
+
+
+if __name__ == '__main__':
+    parser = get_parser()
+    parser.add_argument(
+        '-s',
+        '--eval-split',
+        type=str,
+        default='test',
+        choices=['test'],
+        help='data split to evaluate on, must be test',
+    )
+    parser.add_argument(
+        '--predictions-path',
+        type=str,
+        help='Path to the directory containing the output.jsonl with the predictions.',
+    )
+    args, _ = parser.parse_known_args()
+
+    data_split = args.eval_split
+
+    llm_config = LLMConfig(model='dummy_model')
+
+    metadata = make_metadata(
+        llm_config,
+        f'jetbrains-lca-ci--{data_split}',
+        args.agent_cls,
+        args.max_iterations,
+        args.eval_note,
+        args.predictions_path,
+    )
+
+    # prepare image
+    config = get_config(metadata)
+    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
+    logger.info('Converting output.jsonl into output_lca.jsonl')
+    predictions_lca_path = process_predictions(
+        os.path.join(args.predictions_path, 'output.jsonl')
+    )
+    runtime.copy_to(predictions_lca_path, '/tmp')
+
+    # get results
+    results_str = run_eval(runtime)
+    results_path = os.path.join(args.predictions_path, 'results.jsonl')
+    with open(results_path, 'w') as file:
+        file.write(results_str)
+    logger.info(f'Saved results to {results_path}')
+
+    # make a summary
+    resolved_instances = []
+    unresolved_instances = []
+    for line in results_str.strip().splitlines():
+        data = json.loads(line)
+        conclusion = data.get('conclusion')
+        if conclusion == 'success':
+            resolved_instances.append(data)
+        elif conclusion == 'failure':
+            unresolved_instances.append(data)
+
+    completed_instances = resolved_instances + unresolved_instances
+
+    report = {
+        'success': len(resolved_instances),
+        'failure': len(unresolved_instances),
+        'resolved_instances': resolved_instances,
+        'unresolved_instances': unresolved_instances,
+        'completed_instances': completed_instances,
+    }
+
+    print(f'Results: {report}')
+    report_path = os.path.join(args.predictions_path, 'report.jsonl')
+    with open(report_path, 'w') as out_f:
+        out_f.write(json.dumps(report) + '\n')
+
+    logger.info(f'Saved report of results in swebench format to {report_path}')
@@ -0,0 +1,406 @@
+"""Implements inference on JetBrains CI builds repair baselines
+
+Please see https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair
+and https://huggingface.co/datasets/JetBrains-Research/lca-ci-builds-repair
+
+TODOs:
+- Add EXP_NAME
+"""
+
+import asyncio
+import json
+import os
+from typing import Any
+
+import pandas as pd
+import ruamel.yaml
+from datasets import load_dataset
+
+from evaluation.utils.shared import (
+    EvalMetadata,
+    EvalOutput,
+    codeact_user_response,
+    compatibility_for_eval_history_pairs,
+    get_default_sandbox_config_for_eval,
+    make_metadata,
+    prepare_dataset,
+    reset_logger_for_multiprocessing,
+    run_evaluation,
+)
+from openhands.controller.state.state import State
+from openhands.core.config import (
+    AppConfig,
+    get_llm_config_arg,
+    get_parser,
+    load_app_config,
+)
+from openhands.core.logger import openhands_logger as logger
+from openhands.core.main import create_runtime, run_controller
+from openhands.events.action import CmdRunAction, MessageAction
+from openhands.events.observation import CmdOutputObservation
+from openhands.runtime.base import Runtime
+from openhands.utils.async_utils import call_async_from_sync
+
+
+def get_config(
+    metadata: EvalMetadata,
+) -> AppConfig:
+    sandbox_config = get_default_sandbox_config_for_eval()
+    sandbox_config.base_container_image = 'python:3.12-bookworm'
+    config = AppConfig(
+        default_agent=metadata.agent_class,
+        run_as_openhands=False,
+        runtime='docker',
+        max_iterations=metadata.max_iterations,
+        sandbox=sandbox_config,
+        # do not mount workspace
+        workspace_base=None,
+        workspace_mount_path=None,
+    )
+    config.set_llm_config(metadata.llm_config)
+    agent_config = config.get_agent_config(metadata.agent_class)
+    agent_config.enable_prompt_extensions = False
+    return config
+
+
+config = load_app_config()
+
+
+def load_bench_config():
+    script_dir = os.path.dirname(
+        os.path.abspath(__file__)
+    )  # Get the absolute path of the script
+    config_path = os.path.join(script_dir, 'config.yaml')
+    yaml = ruamel.yaml.YAML(typ='rt')
+    with open(config_path, 'r') as file:
+        return yaml.load(file)
+
+
+bench_config = load_bench_config()
+
+AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
+    'CodeActAgent': codeact_user_response,
+}
+
+AGENT_CLS_TO_INST_SUFFIX = {
+    'CodeActAgent': 'When you think you have completed the task, please finish the interaction using the "finish" tool.\n'
+}
+
+
+def initialize_runtime(
+    runtime: Runtime,
+    instance: pd.Series,
+):
+    """Initialize the runtime for the agent.
+
+    This function is called before the runtime is used to run the agent.
+    """
+    logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
+    obs: CmdOutputObservation
+
+    lca_path = bench_config['LCA_PATH']
+    lca_ci_path = os.path.join(
+        lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
+    )
+
+    repo_name = instance['repo_name']
+    repos_path = bench_config['repos_folder']
+    repo_owner = instance['repo_owner']
+    repo_path = os.path.join(repos_path, f'{repo_owner}__{repo_name}')
+    model_name = bench_config['model_name']
+
+    action = CmdRunAction(command=f'mkdir {lca_path}')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
+    action = CmdRunAction(command=f'cd {lca_path}')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
+    lca_repo_url = 'https://github.com/juanmichelini/lca-baselines'
+    action = CmdRunAction(command=f'git clone {lca_repo_url}')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
+    action = CmdRunAction(command=f'cd {lca_ci_path}')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
+    action = CmdRunAction(command='git switch open-hands-integration')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
+    script_dir = os.path.dirname(
+        os.path.abspath(__file__)
+    )  # Get the absolute path of the script
+    config_path = os.path.join(script_dir, 'config.yaml')
+    with open(config_path, 'r') as file:
+        config_as_text = file.read()
+
+    commandf = f"echo '{config_as_text}' > config.yaml"
+    action = CmdRunAction(command=commandf)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+
+    token_gh = bench_config['token_gh']
+    commandf = f'export TOKEN_GH={token_gh}'
+    action = CmdRunAction(command=commandf)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+
+    action = CmdRunAction(command='poetry install')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+
+    # Set up the task environment
+    commandf = f'poetry run python run_get_datapoint.py --model-name {model_name} --id {instance["id"]} > branch_name.txt'
+    action = CmdRunAction(command=commandf)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    if obs.exit_code != 0:
+        print(f'run_get_datapoint.py failed at {instance["id"]} with {obs.content}')
+    assert obs.exit_code == 0
+
+    commandf = 'cat branch_name.txt'
+    action = CmdRunAction(command=commandf)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    bench_config['user_branch_name'] = obs.content
+
+    # Navigate to the task's code path
+    action = CmdRunAction(command=f'cd {repo_path}')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+
+    logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
+
+
+def complete_runtime(
+    runtime: Runtime,
+    instance: pd.Series,
+) -> dict[str, Any]:
+    """Complete the runtime for the agent.
+
+    This function is called before the runtime is used to run the agent.
+    If you need to do something in the sandbox to get the correctness metric after
+    the agent has run, modify this function.
+    """
+    logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
+    obs: CmdOutputObservation
+
+    model_name = bench_config['model_name']
+
+    lca_path = bench_config['LCA_PATH']
+    lca_ci_path = os.path.join(
+        lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
+    )
+
+    user_branch_name = bench_config['user_branch_name']
+
+    token_gh = bench_config['token_gh']
+    commandf = f'export TOKEN_GH={token_gh}'
+    action = CmdRunAction(command=commandf)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+
+    # Navigate to the lca-baseslines scripts path
+    action = CmdRunAction(command=f'cd {lca_ci_path}')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    assert obs.exit_code == 0
+
+    commandf = f'poetry run python run_push_datapoint.py --id {instance["id"]} --model-name {model_name} --user-branch-name {user_branch_name} > single_output.json'
+    logger.info(f'Running push script: {commandf}')
+    action = CmdRunAction(command=commandf)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    # assert obs.exit_code == 0
+
+    commandf = 'cat single_output.json'
+    action = CmdRunAction(command=commandf)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = runtime.run_action(action)
+    result = json.loads(obs.content)
+
+    logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
+
+    return result
+
+
+def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = True):
+    config = get_config(metadata)
+
+    # Setup the logger properly, so you can run multi-processing to parallelize the evaluation
+    if reset_logger:
+        log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
+        reset_logger_for_multiprocessing(logger, instance['instance_id'], log_dir)
+    else:
+        logger.info(f'Starting evaluation for instance {instance["instance_id"]}.')
+
+    repo_name = instance['repo_name']
+    repo_workflow = instance['workflow_path']
+    repo_logs = instance['logs']
+    repos_path = bench_config['repos_folder']
+    repo_owner = instance['repo_owner']
+    repo_path = os.path.join(repos_path, f'{repo_owner}__{repo_name}')
+
+    # Prepare the task instruction
+    instruction_no_oracle = f"""
+<uploaded_files>
+{repo_path}
+</uploaded_files>
+
+I've uploaded a python code repository in the directory {repo_path}, Consider the following issue:
+
+<issue_description>
+The repository must pass the CI workflow {repo_workflow}.
+but it gave the following error
+{repo_logs}
+</issue_description>
+
+Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?
+I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!
+Also the development Python environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.
+Your task is to make the minimal changes to non-test files in the {repo_path} directory to ensure the <issue_description> is satisfied.
+
+Follow these phases to resolve the issue:
+
+Phase 1. READING: read the problem and reword it in clearer terms
+   1.1 If there are code or config snippets. Express in words any best practices or conventions in them.
+   1.2 Hightlight message errors, method names, variables, file names, stack traces, and technical details.
+   1.3 Explain the problem in clear terms.
+   1.4 Enumerate the steps to reproduce the problem.
+   1.5 Hightlight any best practices to take into account when testing and fixing the issue
+
+Phase 2. RUNNING: install and run the tests on the repository
+   2.1 Follow the readme
+   2.2 Install the environment and anything needed
+   2.2 Iterate and figure out how to run the tests
+
+Phase 3. EXPLORATION: find the files that are related to the problem and possible solutions
+   3.1 Use `grep` to search for relevant methods, classes, keywords and error messages.
+   3.2 Identify all files related to the problem statement.
+   3.3 Propose the methods and files to fix the issue and explain why.
+   3.4 From the possible file locations, select the most likely location to fix the issue.
+
+Phase 4. TEST CREATION: before implementing any fix, create a script to reproduce and verify the issue.
+   4.1 Look at existing test files in the repository to understand the test format/structure.
+   4.2 Create a minimal reproduction script that reproduces the located issue.
+   4.3 Run the reproduction script to confirm you are reproducing the issue.
+   4.4 Adjust the reproduction script as necessary.
+
+Phase 5. FIX ANALYSIS: state clearly the problem and how to fix it
+   5.1 State clearly what the problem is.
+   5.2 State clearly where the problem is located.
+   5.3 State clearly how the test reproduces the issue.
+   5.4 State clearly the best practices to take into account in the fix.
+   5.5 State clearly how to fix the problem.
+
+Phase 6. FIX IMPLEMENTATION: Edit the source code to implement your chosen solution.
+   6.1 Make minimal, focused changes to fix the issue.
+
+Phase 7. VERIFICATION: Test your implementation thoroughly.
+   7.1 Run your reproduction script to verify the fix works.
+   7.2 Add edge cases to your test script to ensure comprehensive coverage.
+   7.3 Run existing tests related to the modified code to ensure you haven't broken anything. Run any tests in the repository related to:
+     7.2.1 The issue you are fixing
+     7.2.2 The files you modified
+     7.2.3 The functions you changed
+   7.4 If any tests fail, revise your implementation until all tests pass
+
+Phase 8. REVIEW: Carefully re-read the problem description and compare your changes with the base commit {instance["sha_fail"]}.
+   8.1 Ensure you've fully addressed all requirements.
+
+Once all phases are done, announce: 'Agent Task Complete'.
+Be thorough in your exploration, testing, and reasoning. It's fine if your thinking process is lengthy - quality and completeness are more important than brevity.
+"""
+    runtime = create_runtime(config)
+    call_async_from_sync(runtime.connect)
+    initialize_runtime(runtime, instance)
+
+    # Run the agent
+    state: State | None = asyncio.run(
+        run_controller(
+            config=config,
+            initial_user_action=MessageAction(content=instruction_no_oracle),
+            runtime=runtime,
+            fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get(
+                metadata.agent_class
+            ),
+        )
+    )
+    assert state is not None
+    metrics = state.metrics.get() if state.metrics else {}
+
+    test_result = complete_runtime(runtime, instance)
+
+    # history is now available as a stream of events, rather than list of pairs of (Action, Observation)
+    # for compatibility with the existing output format, we can remake the pairs here
+    # remove when it becomes unnecessary
+    histories = compatibility_for_eval_history_pairs(state.history)
+
+    # Save the output
+    output = EvalOutput(
+        instance_id=instance['instance_id'],
+        # instance=instance.to_dict(orient='recorods'),
+        instruction=instruction_no_oracle,
+        metadata=metadata,
+        history=histories,
+        test_result=test_result,
+        metrics=metrics,
+    )
+    return output
+
+
+if __name__ == '__main__':
+    parser = get_parser()
+    parser.add_argument(
+        '-s',
+        '--eval-split',
+        type=str,
+        default='test',
+        choices=['test'],
+        help='data split to evaluate on, must be test',
+    )
+    args, _ = parser.parse_known_args()
+
+    data_split = args.eval_split
+
+    bench = load_dataset(
+        'JetBrains-Research/lca-ci-builds-repair', split=data_split
+    ).to_pandas()
+    # todo: see why 126 is giving problems on inference
+    # todo: see why 145 is giving problems on eval
+    bench = bench[bench['id'] != 126]
+    bench = bench[bench['id'] != 145]
+    # bench = bench.iloc[0:56]
+    # add column instnace_id for compatibility with oh repo, old id column must be kept for lca repo
+    bench['instance_id'] = bench['id'].astype(str)
+
+    llm_config = None
+    if args.llm_config:
+        llm_config = get_llm_config_arg(args.llm_config)
+        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
+        llm_config.modify_params = False
+    if llm_config is None:
+        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
+
+    metadata = make_metadata(
+        llm_config,
+        f'jetbrains-lca-ci--{data_split}',
+        args.agent_cls,
+        args.max_iterations,
+        args.eval_note,
+        args.eval_output_dir,
+    )
+    output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
+    instances = prepare_dataset(bench, output_file, args.eval_n_limit)
+
+    run_evaluation(
+        instances, metadata, output_file, args.eval_num_workers, process_instance
+    )
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+source "evaluation/utils/version_control.sh"
+
+PROCESS_FILEPATH=$1
+if [ -z "$PROCESS_FILEPATH" ]; then
+    echo "Error: PROCESS_FILEPATH is empty. Usage: ./eval_infer.sh <output_file> [instance_id] [dataset_name] [split]"
+    exit 1
+fi
+
+get_openhands_version
+
+PROCESS_FILEPATH=$(realpath $PROCESS_FILEPATH)
+echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
+echo "PROCESS_FILEPATH: $PROCESS_FILEPATH"
+
+EVAL_NOTE="$OPENHANDS_VERSION"
+if [ -n "$EXP_NAME" ]; then
+  EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
+fi
+
+function run_eval() {
+  COMMAND="poetry run python ./evaluation/benchmarks/lca_ci_build_repair/eval_infer.py \
+    --predictions-path $PROCESS_FILEPATH "
+
+  echo "RUNNING: $COMMAND"
+  # Run the command
+  eval $COMMAND
+}
+
+unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
+run_eval
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+source "evaluation/utils/version_control.sh"
+
+MODEL_CONFIG=$1
+
+get_openhands_version
+
+echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
+echo "MODEL_CONFIG: $MODEL_CONFIG"
+
+EVAL_NOTE="$OPENHANDS_VERSION"
+if [ -n "$EXP_NAME" ]; then
+  EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
+fi
+
+function run_eval() {
+  COMMAND="poetry run python ./evaluation/benchmarks/lca_ci_build_repair/run_infer.py \
+    --llm-config $MODEL_CONFIG "
+
+  # Run the command
+  eval $COMMAND
+}
+
+#unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
+run_eval
@@ -0,0 +1,60 @@
+"""Installs LCA CI Build Repair benchmark with scripts for OH integration."""
+
+import os
+import shutil
+import subprocess
+
+import yaml
+
+
+def setup():
+    # Read config.yaml
+    print('Reading config.yaml')
+    script_dir = os.path.dirname(
+        os.path.abspath(__file__)
+    )  # Get the absolute path of the script
+    config_path = os.path.join(script_dir, 'config.yaml')
+    with open(config_path, 'r') as f:
+        config = yaml.safe_load(f)
+
+    lca_path = config['LCA_PATH']
+    lca_ci_path = os.path.join(
+        lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
+    )
+    repo_url = 'https://github.com/juanmichelini/lca-baselines'
+
+    # Clone the repository to LCA_CI_PATH
+    print(f'Cloning lca-baselines repository from {repo_url} into {lca_path}')
+    result = subprocess.run(
+        ['git', 'clone', repo_url], cwd=lca_path, capture_output=True, text=True
+    )
+    if result.returncode != 0:
+        print(f'Warning cloning repository: {result.stderr}')
+
+    # Clone the repository to LCA_CI_PATH
+    print('Switching branches')
+    result = subprocess.run(
+        ['git', 'switch', 'open-hands-integration'],
+        cwd=lca_ci_path,
+        capture_output=True,
+        text=True,
+    )
+    if result.returncode != 0:
+        print(f'Warning switching repository: {result.stderr}')
+
+    # Move and rename config_lca.yaml (overwrite if exists)
+    lca_ci_config_path = os.path.join(lca_ci_path, 'config.yaml')
+    print(f'Copying config.yaml to {lca_ci_config_path}')
+    shutil.copy(config_path, lca_ci_config_path)
+
+    # Run poetry install in LCA_CI_PATH
+    print(f"Running 'poetry install' in {lca_ci_path}")
+    result = subprocess.run(
+        ['poetry', 'install'], cwd=lca_ci_path, capture_output=True, text=True
+    )
+    if result.returncode != 0:
+        print(f'Warning during poetry install: {result.stderr}')
+
+
+if __name__ == '__main__':
+    setup()
@@ -2,6 +2,8 @@

 This folder contains the evaluation harness that we built on top of the original [SWE-Bench benchmark](https://www.swebench.com/) ([paper](https://arxiv.org/abs/2310.06770)).

+**UPDATE (4/8/2025): We now support running SWT-Bench evaluation! For more details, checkout [the corresponding section](#SWT-Bench-Evaluation).**
+
 **UPDATE (03/27/2025): We now support SWE-Bench multimodal evaluation! Simply use "princeton-nlp/SWE-bench_Multimodal" as the dataset name in the `run_infer.sh` script to evaluate on multimodal instances.**

 **UPDATE (2/18/2025): We now support running SWE-Gym using the same evaluation harness here. For more details, checkout [this README](./SWE-Gym.md).**
@@ -141,7 +143,7 @@ With `output.jsonl` file, you can run `eval_infer.sh` to evaluate generated patc
 ./evaluation/benchmarks/swe_bench/scripts/eval_infer.sh $YOUR_OUTPUT_JSONL [instance_id] [dataset_name] [split]

 # Example
-./evaluation/benchmarks/swe_bench/scripts/eval_infer.sh evaluation/evaluation_outputs/outputs/swe_bench/CodeActAgent/gpt-4-1106-preview_maxiter_50_N_v1.0/output.jsonl
+./evaluation/benchmarks/swe_bench/scripts/eval_infer.sh evaluation/evaluation_outputs/outputs/princeton-nlp__SWE-bench_Lite/CodeActAgent/gpt-4-1106-preview_maxiter_50_N_v1.0/output.jsonl
 ```

 The script now accepts optional arguments:
@@ -182,3 +184,58 @@ To clean-up all existing runtimes that you've already started, run:
 ```bash
 ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/utils/scripts/cleanup_remote_runtime.sh
 ```
+
+## SWT-Bench Evaluation
+
+[SWT-Bench](https://swtbench.com/) ([paper](https://arxiv.org/abs/2406.12952)) is a benchmark for evaluating the capability of LLMs at creating unit tests. It is performed on the same instances as SWE-Bench, but requires a separate evaluation harness to capture coverage and issue reproduction. We therefore detail below how to leverage the inference script in this folder to run inference on SWT-Bench and how to use the SWT-Bench evaluation harness to evaluate them.
+
+### Run inference on SWT-Bench
+
+To run inference on SWT-Bench, you can use the same `run_infer.sh` script as described for evaluation on plain SWE-Bench. The only differences is that you need to specify the `mode` parameter to `swt` or `swt-ci` when running the script. For example, to run inference on SWT-Bench Verified, run the following command:
+
+```bash
+./evaluation/benchmarks/swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers] [swe-dataset] test 1 swt
+
+# Example - This runs evaluation on CodeActAgent for 500 instances on "SWT-bench_Verified"'s test set (corresponding to SWE-bench_Verified), with max 100 iteration per instances, with 1 number of workers running in parallel
+./evaluation/benchmarks/swe_bench/scripts/run_infer.sh llm.eval_gpt4o-2024-11-20 HEAD CodeActAgent 500 100 1 princeton-nlp/SWE-bench_Verified test 1 swt
+```
+
+The two modes `swt` and `swt-ci` have the following effect:
+- `swt`: This mode will change the prompt to instruct the agent to generate reproducing test cases instead of resolving the issue.
+- `swt-ci`: In addition to the changes by `swt`, this mode sets up the CI environment by i) pre-installing the environment in the docker image, such that the test framework can be executed without errors and ii) telling the model the exact command to run the test framework.
+
+### Run evaluation for SWT-bench
+
+The evaluation of these results is done leveraging [the SWT-Bench evaluation harness](https://github.com/logic-star-ai/swt-bench/tree/master).
+
+#### Extracting results into SWT-Bench harness format
+In order to run evaluation of the obtained inference results in the SWT-Bench harness, we transform the results to a format that the SWT-Bench evaluation harness expects.
+
+```bash
+python3 evaluation/benchmarks/swe_bench/scripts/swtbench/convert.py --prediction_file [output.jsonl] > [output_swt.jsonl]
+
+# Example
+python3 evaluation/benchmarks/swe_bench/scripts/swtbench/convert.py --prediction_file "evaluation/evaluation_outputs/outputs/princeton-nlp__SWE-bench_Verified-test/CodeActAgent/gpt-4o-2024-11-20_maxiter_100_N_v0.31.0-no-hint-swt-run_1/output.jsonl" > OpenHands-gpt-4o-2024-11-20.jsonl
+```
+
+#### Running the results in SWT-Bench
+
+Next, we run the [SWT-Bench evaluation harness](https://github.com/logic-star-ai/swt-bench/tree/master) with these results.
+First set-up and validate the setup as described in the harness [here](https://github.com/logic-star-ai/swt-bench/tree/master?tab=readme-ov-file#-set-up).
+Then, run the evaluation with the following command:
+
+```bash
+# Example
+python3 -m src.main \
+    --dataset_name princeton-nlp/SWE-bench_Verified \
+    --predictions_path <pathTo>/OpenHands-gpt-4o-2024-11-20.jsonl \
+    --max_workers 12 \
+    --run_id OpenHands-CodeAct-gpt-4o-2024-11-20  --patch_types vanilla  --build_mode api
+```
+
+The results of the evaluation can be obtained by running the reporting script of the harness.
+
+```bash
+# Example
+python -m src.report run_instance_swt_logs/OpenHands-CodeAct-gpt-4o-2024-11-20/OpenHands__CodeActAgent__gpt-4o-2024-11-20 --dataset verified
+```
@@ -0,0 +1,842 @@
+# Based on https://github.com/logic-star-ai/swt-bench/blob/master/src/constants.py
+
+# Constants - Installation Specifications
+MAP_VERSION_TO_INSTALL_SKLEARN = {
+    k: {
+        'python': '3.6',
+        'packages': 'numpy scipy cython pytest pandas matplotlib',
+        'install': 'python -m pip install -v --no-use-pep517 --no-build-isolation -e .',
+        'pip_packages': [
+            'cython',
+            'numpy==1.19.2',
+            'setuptools',
+            'scipy==1.5.2',
+        ],
+    }
+    for k in ['0.20', '0.21', '0.22']
+}
+MAP_VERSION_TO_INSTALL_SKLEARN.update(
+    {
+        k: {
+            'python': '3.9',
+            'packages': "'numpy==1.19.2' 'scipy==1.5.2' 'cython==3.0.10' pytest 'pandas<2.0.0' 'matplotlib<3.9.0' setuptools pytest joblib threadpoolctl",
+            'install': 'python -m pip install -v --no-use-pep517 --no-build-isolation -e .',
+            'pip_packages': ['cython', 'setuptools', 'numpy', 'scipy'],
+        }
+        for k in ['1.3', '1.4']
+    }
+)
+MAP_VERSION_TO_INSTALL_FLASK = {
+    '2.0': {
+        'python': '3.9',
+        'packages': 'requirements.txt',
+        'install': 'python -m pip install -e .',
+        'pip_packages': [
+            'setuptools==70.0.0',
+            'Werkzeug==2.3.7',
+            'Jinja2==3.0.1',
+            'itsdangerous==2.1.2',
+            'click==8.0.1',
+            'MarkupSafe==2.1.3',
+        ],
+    },
+    '2.1': {
+        'python': '3.10',
+        'packages': 'requirements.txt',
+        'install': 'python -m pip install -e .',
+        'pip_packages': [
+            'click==8.1.3',
+            'itsdangerous==2.1.2',
+            'Jinja2==3.1.2',
+            'MarkupSafe==2.1.1',
+            'Werkzeug==2.3.7',
+        ],
+    },
+}
+MAP_VERSION_TO_INSTALL_FLASK.update(
+    {
+        k: {
+            'python': '3.11',
+            'packages': 'requirements.txt',
+            'install': 'python -m pip install -e .',
+            'pip_packages': [
+                'click==8.1.3',
+                'itsdangerous==2.1.2',
+                'Jinja2==3.1.2',
+                'MarkupSafe==2.1.1',
+                'Werkzeug==2.3.7',
+            ],
+        }
+        for k in ['2.2', '2.3']
+    }
+)
+MAP_VERSION_TO_INSTALL_DJANGO = {
+    k: {
+        'python': '3.5',
+        'packages': 'requirements.txt',
+        'pre_install': [
+            'apt-get update && apt-get install -y locales',
+            "echo 'en_US UTF-8' > /etc/locale.gen",
+            'locale-gen en_US.UTF-8',
+        ],
+        'install': 'python setup.py install',
+        'pip_packages': ['setuptools'],
+        'eval_commands': [
+            'export LANG=en_US.UTF-8',
+            'export LC_ALL=en_US.UTF-8',
+            'export PYTHONIOENCODING=utf8',
+            'export LANGUAGE=en_US:en',
+        ],
+    }
+    for k in ['1.7', '1.8', '1.9', '1.10', '1.11', '2.0', '2.1', '2.2']
+}
+MAP_VERSION_TO_INSTALL_DJANGO.update(
+    {
+        k: {'python': '3.5', 'install': 'python setup.py install'}
+        for k in ['1.4', '1.5', '1.6']
+    }
+)
+MAP_VERSION_TO_INSTALL_DJANGO.update(
+    {
+        k: {
+            'python': '3.6',
+            'packages': 'requirements.txt',
+            'install': 'python -m pip install -e .',
+            'eval_commands': [
+                "sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen",
+                'export LANG=en_US.UTF-8',
+                'export LANGUAGE=en_US:en',
+                'export LC_ALL=en_US.UTF-8',
+            ],
+        }
+        for k in ['3.0', '3.1', '3.2']
+    }
+)
+MAP_VERSION_TO_INSTALL_DJANGO.update(
+    {
+        k: {
+            'python': '3.8',
+            'packages': 'requirements.txt',
+            'install': 'python -m pip install -e .',
+        }
+        for k in ['4.0']
+    }
+)
+MAP_VERSION_TO_INSTALL_DJANGO.update(
+    {
+        k: {
+            'python': '3.9',
+            'packages': 'requirements.txt',
+            'install': 'python -m pip install -e .',
+        }
+        for k in ['4.1', '4.2']
+    }
+)
+MAP_VERSION_TO_INSTALL_DJANGO.update(
+    {
+        k: {
+            'python': '3.11',
+            'packages': 'requirements.txt',
+            'install': 'python -m pip install -e .',
+        }
+        for k in ['5.0']
+    }
+)
+MAP_VERSION_TO_INSTALL_REQUESTS = {
+    k: {'python': '3.9', 'packages': 'pytest', 'install': 'python -m pip install .'}
+    for k in ['0.7', '0.8', '0.9', '0.11', '0.13', '0.14', '1.1', '1.2', '2.0', '2.2']
+    + ['2.3', '2.4', '2.5', '2.7', '2.8', '2.9', '2.10', '2.11', '2.12', '2.17']
+    + ['2.18', '2.19', '2.22', '2.26', '2.25', '2.27', '3.0']
+}
+MAP_VERSION_TO_INSTALL_SEABORN = {
+    k: {
+        'python': '3.9',
+        'install': 'python -m pip install -e .',
+        'pip_packages': [
+            'contourpy==1.1.0',
+            'cycler==0.11.0',
+            'fonttools==4.42.1',
+            'importlib-resources==6.0.1',
+            'kiwisolver==1.4.5',
+            'matplotlib==3.7.2',
+            'numpy==1.25.2',
+            'packaging==23.1',
+            'pandas==1.3.5',  # 2.0.3
+            'pillow==10.0.0',
+            'pyparsing==3.0.9',
+            'pytest',
+            'python-dateutil==2.8.2',
+            'pytz==2023.3.post1',
+            'scipy==1.11.2',
+            'six==1.16.0',
+            'tzdata==2023.1',
+            'zipp==3.16.2',
+        ],
+    }
+    for k in ['0.11']
+}
+MAP_VERSION_TO_INSTALL_SEABORN.update(
+    {
+        k: {
+            'python': '3.9',
+            'install': 'python -m pip install -e .[dev]',
+            'pip_packages': [
+                'contourpy==1.1.0',
+                'cycler==0.11.0',
+                'fonttools==4.42.1',
+                'importlib-resources==6.0.1',
+                'kiwisolver==1.4.5',
+                'matplotlib==3.7.2',
+                'numpy==1.25.2',
+                'packaging==23.1',
+                'pandas==2.0.0',
+                'pillow==10.0.0',
+                'pyparsing==3.0.9',
+                'pytest',
+                'python-dateutil==2.8.2',
+                'pytz==2023.3.post1',
+                'scipy==1.11.2',
+                'six==1.16.0',
+                'tzdata==2023.1',
+                'zipp==3.16.2',
+            ],
+        }
+        for k in ['0.12', '0.13']
+    }
+)
+MAP_VERSION_TO_INSTALL_PYTEST = {
+    k: {'python': '3.9', 'install': 'python -m pip install -e .'}
+    for k in [
+        '4.4',
+        '4.5',
+        '4.6',
+        '5.0',
+        '5.1',
+        '5.2',
+        '5.3',
+        '5.4',
+        '6.0',
+        '6.2',
+        '6.3',
+        '7.0',
+        '7.1',
+        '7.2',
+        '7.4',
+        '8.0',
+    ]
+}
+MAP_VERSION_TO_INSTALL_PYTEST['4.4']['pip_packages'] = [
+    'atomicwrites==1.4.1',
+    'attrs==23.1.0',
+    'more-itertools==10.1.0',
+    'pluggy==0.13.1',
+    'py==1.11.0',
+    'setuptools==68.0.0',
+    'six==1.16.0',
+]
+MAP_VERSION_TO_INSTALL_PYTEST['4.5']['pip_packages'] = [
+    'atomicwrites==1.4.1',
+    'attrs==23.1.0',
+    'more-itertools==10.1.0',
+    'pluggy==0.11.0',
+    'py==1.11.0',
+    'setuptools==68.0.0',
+    'six==1.16.0',
+    'wcwidth==0.2.6',
+]
+MAP_VERSION_TO_INSTALL_PYTEST['4.6']['pip_packages'] = [
+    'atomicwrites==1.4.1',
+    'attrs==23.1.0',
+    'more-itertools==10.1.0',
+    'packaging==23.1',
+    'pluggy==0.13.1',
+    'py==1.11.0',
+    'six==1.16.0',
+    'wcwidth==0.2.6',
+]
+for k in ['5.0', '5.1', '5.2']:
+    MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
+        'atomicwrites==1.4.1',
+        'attrs==23.1.0',
+        'more-itertools==10.1.0',
+        'packaging==23.1',
+        'pluggy==0.13.1',
+        'py==1.11.0',
+        'wcwidth==0.2.6',
+    ]
+MAP_VERSION_TO_INSTALL_PYTEST['5.3']['pip_packages'] = [
+    'attrs==23.1.0',
+    'more-itertools==10.1.0',
+    'packaging==23.1',
+    'pluggy==0.13.1',
+    'py==1.11.0',
+    'wcwidth==0.2.6',
+]
+MAP_VERSION_TO_INSTALL_PYTEST['5.4']['pip_packages'] = [
+    'py==1.11.0',
+    'packaging==23.1',
+    'attrs==23.1.0',
+    'more-itertools==10.1.0',
+    'pluggy==0.13.1',
+]
+MAP_VERSION_TO_INSTALL_PYTEST['6.0']['pip_packages'] = [
+    'attrs==23.1.0',
+    'iniconfig==2.0.0',
+    'more-itertools==10.1.0',
+    'packaging==23.1',
+    'pluggy==0.13.1',
+    'py==1.11.0',
+    'toml==0.10.2',
+]
+for k in ['6.2', '6.3']:
+    MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
+        'attrs==23.1.0',
+        'iniconfig==2.0.0',
+        'packaging==23.1',
+        'pluggy==0.13.1',
+        'py==1.11.0',
+        'toml==0.10.2',
+    ]
+MAP_VERSION_TO_INSTALL_PYTEST['7.0']['pip_packages'] = [
+    'attrs==23.1.0',
+    'iniconfig==2.0.0',
+    'packaging==23.1',
+    'pluggy==0.13.1',
+    'py==1.11.0',
+]
+for k in ['7.1', '7.2']:
+    MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
+        'attrs==23.1.0',
+        'iniconfig==2.0.0',
+        'packaging==23.1',
+        'pluggy==0.13.1',
+        'py==1.11.0',
+        'tomli==2.0.1',
+    ]
+MAP_VERSION_TO_INSTALL_PYTEST['7.4']['pip_packages'] = [
+    'iniconfig==2.0.0',
+    'packaging==23.1',
+    'pluggy==1.3.0',
+    'exceptiongroup==1.1.3',
+    'tomli==2.0.1',
+]
+MAP_VERSION_TO_INSTALL_PYTEST['8.0']['pip_packages'] = [
+    'iniconfig==2.0.0',
+    'packaging==23.1',
+    'pluggy==1.3.0',
+    'exceptiongroup==1.1.3',
+    'tomli==2.0.1',
+]
+MAP_VERSION_TO_INSTALL_MATPLOTLIB = {
+    k: {
+        'python': '3.11',
+        'packages': 'environment.yml',
+        'install': 'python -m pip install -e .',
+        'pre_install': [
+            'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg texlive texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-luatex cm-super dvipng'
+        ],
+        'pip_packages': [
+            'contourpy==1.1.0',
+            'cycler==0.11.0',
+            'fonttools==4.42.1',
+            'ghostscript',
+            'kiwisolver==1.4.5',
+            'numpy==1.25.2',
+            'packaging==23.1',
+            'pillow==10.0.0',
+            'pikepdf',
+            'pyparsing==3.0.9',
+            'python-dateutil==2.8.2',
+            'six==1.16.0',
+            'setuptools==68.1.2',
+            'setuptools-scm==7.1.0',
+            'typing-extensions==4.7.1',
+        ],
+    }
+    for k in ['3.5', '3.6', '3.7']
+}
+MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
+    {
+        k: {
+            'python': '3.8',
+            'packages': 'requirements.txt',
+            'install': 'python -m pip install -e .',
+            'pre_install': [
+                'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg libfreetype6-dev pkg-config texlive texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-luatex cm-super'
+            ],
+            'pip_packages': ['pytest', 'ipython'],
+        }
+        for k in ['3.1', '3.2', '3.3', '3.4']
+    }
+)
+MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
+    {
+        k: {
+            'python': '3.7',
+            'packages': 'requirements.txt',
+            'install': 'python -m pip install -e .',
+            'pre_install': [
+                'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg libfreetype6-dev pkg-config'
+            ],
+            'pip_packages': ['pytest'],
+        }
+        for k in ['3.0']
+    }
+)
+MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
+    {
+        k: {
+            'python': '3.5',
+            'install': 'python setup.py build; python setup.py install',
+            'pre_install': [
+                'apt-get -y update && apt-get -y upgrade && && apt-get install -y imagemagick ffmpeg'
+            ],
+            'pip_packages': ['pytest'],
+            'execute_test_as_nonroot': True,
+        }
+        for k in ['2.0', '2.1', '2.2', '1.0', '1.1', '1.2', '1.3', '1.4', '1.5']
+    }
+)
+MAP_VERSION_TO_INSTALL_SPHINX = {
+    k: {
+        'python': '3.9',
+        'pip_packages': ['tox==4.16.0', 'tox-current-env==0.0.11'],
+        'install': 'python -m pip install -e .[test]',
+        'pre_install': ["sed -i 's/pytest/pytest -rA/' tox.ini"],
+    }
+    for k in ['1.5', '1.6', '1.7', '1.8', '2.0', '2.1', '2.2', '2.3', '2.4', '3.0']
+    + ['3.1', '3.2', '3.3', '3.4', '3.5', '4.0', '4.1', '4.2', '4.3', '4.4']
+    + ['4.5', '5.0', '5.1', '5.2', '5.3', '6.0', '6.2', '7.0', '7.1', '7.2']
+}
+for k in ['3.0', '3.1', '3.2', '3.3', '3.4', '3.5', '4.0', '4.1', '4.2', '4.3', '4.4']:
+    MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
+        [
+            "sed -i 's/Jinja2>=2.3/Jinja2<3.0/' setup.py",
+            "sed -i 's/sphinxcontrib-applehelp/sphinxcontrib-applehelp<=1.0.7/' setup.py",
+            "sed -i 's/sphinxcontrib-devhelp/sphinxcontrib-devhelp<=1.0.5/' setup.py",
+            "sed -i 's/sphinxcontrib-qthelp/sphinxcontrib-qthelp<=1.0.6/' setup.py",
+            "sed -i 's/alabaster>=0.7,<0.8/alabaster>=0.7,<0.7.12/' setup.py",
+            "sed -i \"s/'packaging',/'packaging', 'markupsafe<=2.0.1',/\" setup.py",
+        ]
+    )
+    if k in ['4.2', '4.3', '4.4']:
+        MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
+            [
+                "sed -i 's/sphinxcontrib-htmlhelp>=2.0.0/sphinxcontrib-htmlhelp>=2.0.0,<=2.0.4/' setup.py",
+                "sed -i 's/sphinxcontrib-serializinghtml>=1.1.5/sphinxcontrib-serializinghtml>=1.1.5,<=1.1.9/' setup.py",
+            ]
+        )
+    elif k == '4.1':
+        MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
+            [
+                (
+                    "grep -q 'sphinxcontrib-htmlhelp>=2.0.0' setup.py && "
+                    "sed -i 's/sphinxcontrib-htmlhelp>=2.0.0/sphinxcontrib-htmlhelp>=2.0.0,<=2.0.4/' setup.py || "
+                    "sed -i 's/sphinxcontrib-htmlhelp/sphinxcontrib-htmlhelp<=2.0.4/' setup.py"
+                ),
+                (
+                    "grep -q 'sphinxcontrib-serializinghtml>=1.1.5' setup.py && "
+                    "sed -i 's/sphinxcontrib-serializinghtml>=1.1.5/sphinxcontrib-serializinghtml>=1.1.5,<=1.1.9/' setup.py || "
+                    "sed -i 's/sphinxcontrib-serializinghtml/sphinxcontrib-serializinghtml<=1.1.9/' setup.py"
+                ),
+            ]
+        )
+    else:
+        MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
+            [
+                "sed -i 's/sphinxcontrib-htmlhelp/sphinxcontrib-htmlhelp<=2.0.4/' setup.py",
+                "sed -i 's/sphinxcontrib-serializinghtml/sphinxcontrib-serializinghtml<=1.1.9/' setup.py",
+            ]
+        )
+MAP_VERSION_TO_INSTALL_SPHINX['7.2']['pre_install'] += [
+    'apt-get update && apt-get install -y graphviz'
+]
+MAP_VERSION_TO_INSTALL_ASTROPY = {
+    k: {
+        'python': '3.9',
+        'install': 'python -m pip install -e .[test] --verbose',
+        'pip_packages': [
+            'attrs==23.1.0',
+            'exceptiongroup==1.1.3',
+            'execnet==2.0.2',
+            'hypothesis==6.82.6',
+            'iniconfig==2.0.0',
+            'numpy==1.25.2',
+            'packaging==23.1',
+            'pluggy==1.3.0',
+            'psutil==5.9.5',
+            'pyerfa==2.0.0.3',
+            'pytest-arraydiff==0.5.0',
+            'pytest-astropy-header==0.2.2',
+            'pytest-astropy==0.10.0',
+            'pytest-cov==4.1.0',
+            'pytest-doctestplus==1.0.0',
+            'pytest-filter-subpackage==0.1.2',
+            'pytest-mock==3.11.1',
+            'pytest-openfiles==0.5.0',
+            'pytest-remotedata==0.4.0',
+            'pytest-xdist==3.3.1',
+            'pytest==7.4.0',
+            'PyYAML==6.0.1',
+            'setuptools==68.0.0',
+            'sortedcontainers==2.4.0',
+            'tomli==2.0.1',
+        ],
+    }
+    for k in ['0.1', '0.2', '0.3', '0.4', '1.1', '1.2', '1.3', '3.0', '3.1', '3.2']
+    + ['4.1', '4.2', '4.3', '5.0', '5.1', '5.2']
+}
+for k in ['4.1', '4.2', '4.3', '5.0', '5.1', '5.2']:
+    MAP_VERSION_TO_INSTALL_ASTROPY[k]['pre_install'] = [
+        'sed -i \'s/requires = \\["setuptools",/requires = \\["setuptools==68.0.0",/\' pyproject.toml'
+    ]
+MAP_VERSION_TO_INSTALL_SYMPY = {
+    k: {
+        'python': '3.9',
+        'packages': 'mpmath flake8',
+        'pip_packages': ['mpmath==1.3.0', 'flake8-comprehensions'],
+        'install': 'python -m pip install -e .',
+    }
+    for k in ['0.7', '1.0', '1.1', '1.10', '1.11', '1.12', '1.2', '1.4', '1.5', '1.6']
+    + ['1.7', '1.8', '1.9']
+}
+MAP_VERSION_TO_INSTALL_SYMPY.update(
+    {
+        k: {
+            'python': '3.9',
+            'packages': 'requirements.txt',
+            'install': 'python -m pip install -e .',
+            'pip_packages': ['mpmath==1.3.0'],
+        }
+        for k in ['1.13']
+    }
+)
+MAP_VERSION_TO_INSTALL_PYLINT = {
+    k: {
+        'python': '3.9',
+        'packages': 'requirements.txt',
+        'install': 'python -m pip install -e .',
+    }
+    for k in [
+        '2.10',
+        '2.11',
+        '2.13',
+        '2.14',
+        '2.15',
+        '2.16',
+        '2.17',
+        '2.8',
+        '2.9',
+        '3.0',
+    ]
+}
+MAP_VERSION_TO_INSTALL_PYLINT['2.8']['pip_packages'] = ['pyenchant==3.2']
+MAP_VERSION_TO_INSTALL_PYLINT['2.8']['pre_install'] = [
+    'apt-get update && apt-get install -y libenchant-2-dev hunspell-en-us'
+]
+MAP_VERSION_TO_INSTALL_PYLINT.update(
+    {
+        k: {
+            **MAP_VERSION_TO_INSTALL_PYLINT[k],
+            'pip_packages': ['astroid==3.0.0a6', 'setuptools'],
+        }
+        for k in ['3.0']
+    }
+)
+
+MAP_VERSION_TO_INSTALL_XARRAY = {
+    k: {
+        'python': '3.10',
+        'packages': 'environment.yml',
+        'install': 'python -m pip install -e .',
+        'pip_packages': [
+            'numpy==1.23.0',
+            'packaging==23.1',
+            'pandas==1.5.3',
+            'pytest==7.4.0',
+            'python-dateutil==2.8.2',
+            'pytz==2023.3',
+            'six==1.16.0',
+            'scipy==1.11.1',
+            'setuptools==68.0.0',
+        ],
+        'no_use_env': True,
+    }
+    for k in ['0.12', '0.18', '0.19', '0.20', '2022.03', '2022.06', '2022.09']
+}
+
+MAP_VERSION_TO_INSTALL_SQLFLUFF = {
+    k: {
+        'python': '3.9',
+        'packages': 'requirements.txt',
+        'install': 'python -m pip install -e .',
+    }
+    for k in [
+        '0.10',
+        '0.11',
+        '0.12',
+        '0.13',
+        '0.4',
+        '0.5',
+        '0.6',
+        '0.8',
+        '0.9',
+        '1.0',
+        '1.1',
+        '1.2',
+        '1.3',
+        '1.4',
+        '2.0',
+        '2.1',
+        '2.2',
+    ]
+}
+MAP_VERSION_TO_INSTALL_DBT_CORE = {
+    k: {
+        'python': '3.9',
+        'packages': 'requirements.txt',
+        'install': 'python -m pip install -e .',
+    }
+    for k in [
+        '0.13',
+        '0.14',
+        '0.15',
+        '0.16',
+        '0.17',
+        '0.18',
+        '0.19',
+        '0.20',
+        '0.21',
+        '1.0',
+        '1.1',
+        '1.2',
+        '1.3',
+        '1.4',
+        '1.5',
+        '1.6',
+        '1.7',
+    ]
+}
+MAP_VERSION_TO_INSTALL_PYVISTA = {
+    k: {
+        'python': '3.9',
+        'install': 'python -m pip install -e .',
+        'pip_packages': ['pytest'],
+    }
+    for k in ['0.20', '0.21', '0.22', '0.23']
+}
+MAP_VERSION_TO_INSTALL_PYVISTA.update(
+    {
+        k: {
+            'python': '3.9',
+            'packages': 'requirements.txt',
+            'install': 'python -m pip install -e .',
+            'pip_packages': ['pytest'],
+        }
+        for k in [
+            '0.24',
+            '0.25',
+            '0.26',
+            '0.27',
+            '0.28',
+            '0.29',
+            '0.30',
+            '0.31',
+            '0.32',
+            '0.33',
+            '0.34',
+            '0.35',
+            '0.36',
+            '0.37',
+            '0.38',
+            '0.39',
+            '0.40',
+            '0.41',
+            '0.42',
+            '0.43',
+        ]
+    }
+)
+MAP_VERSION_TO_INSTALL_ASTROID = {
+    k: {
+        'python': '3.9',
+        'install': 'python -m pip install -e .',
+        'pip_packages': ['pytest'],
+    }
+    for k in [
+        '2.10',
+        '2.12',
+        '2.13',
+        '2.14',
+        '2.15',
+        '2.16',
+        '2.5',
+        '2.6',
+        '2.7',
+        '2.8',
+        '2.9',
+        '3.0',
+    ]
+}
+MAP_VERSION_TO_INSTALL_MARSHMALLOW = {
+    k: {
+        'python': '3.9',
+        'install': "python -m pip install -e '.[dev]'",
+    }
+    for k in [
+        '2.18',
+        '2.19',
+        '2.20',
+        '3.0',
+        '3.1',
+        '3.10',
+        '3.11',
+        '3.12',
+        '3.13',
+        '3.15',
+        '3.16',
+        '3.19',
+        '3.2',
+        '3.4',
+        '3.8',
+        '3.9',
+    ]
+}
+MAP_VERSION_TO_INSTALL_PVLIB = {
+    k: {
+        'python': '3.9',
+        'install': 'python -m pip install -e .[all]',
+        'packages': 'pandas scipy',
+        'pip_packages': ['jupyter', 'ipython', 'matplotlib', 'pytest', 'flake8'],
+    }
+    for k in ['0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9']
+}
+MAP_VERSION_TO_INSTALL_PYDICOM = {
+    k: {'python': '3.6', 'install': 'python -m pip install -e .', 'packages': 'numpy'}
+    for k in [
+        '1.0',
+        '1.1',
+        '1.2',
+        '1.3',
+        '1.4',
+        '2.0',
+        '2.1',
+        '2.2',
+        '2.3',
+        '2.4',
+        '3.0',
+    ]
+}
+MAP_VERSION_TO_INSTALL_PYDICOM.update(
+    {k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.8'} for k in ['1.4', '2.0']}
+)
+MAP_VERSION_TO_INSTALL_PYDICOM.update(
+    {k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.9'} for k in ['2.1', '2.2']}
+)
+MAP_VERSION_TO_INSTALL_PYDICOM.update(
+    {k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.10'} for k in ['2.3']}
+)
+MAP_VERSION_TO_INSTALL_PYDICOM.update(
+    {k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.11'} for k in ['2.4', '3.0']}
+)
+MAP_VERSION_TO_INSTALL_HUMANEVAL = {k: {'python': '3.9'} for k in ['1.0']}
+MAP_VERSION_TO_INSTALL_HUMANEVAL_FIX = {
+    k: {'python': '3.10', 'packages': 'pytest'} for k in ['0.0.1']
+}
+
+# Constants - Task Instance Instllation Environment
+MAP_VERSION_TO_INSTALL = {
+    'astropy/astropy': MAP_VERSION_TO_INSTALL_ASTROPY,
+    'dbt-labs/dbt-core': MAP_VERSION_TO_INSTALL_DBT_CORE,
+    'django/django': MAP_VERSION_TO_INSTALL_DJANGO,
+    'matplotlib/matplotlib': MAP_VERSION_TO_INSTALL_MATPLOTLIB,
+    'marshmallow-code/marshmallow': MAP_VERSION_TO_INSTALL_MARSHMALLOW,
+    'mwaskom/seaborn': MAP_VERSION_TO_INSTALL_SEABORN,
+    'pallets/flask': MAP_VERSION_TO_INSTALL_FLASK,
+    'psf/requests': MAP_VERSION_TO_INSTALL_REQUESTS,
+    'pvlib/pvlib-python': MAP_VERSION_TO_INSTALL_PVLIB,
+    'pydata/xarray': MAP_VERSION_TO_INSTALL_XARRAY,
+    'pydicom/pydicom': MAP_VERSION_TO_INSTALL_PYDICOM,
+    'pylint-dev/astroid': MAP_VERSION_TO_INSTALL_ASTROID,
+    'pylint-dev/pylint': MAP_VERSION_TO_INSTALL_PYLINT,
+    'pytest-dev/pytest': MAP_VERSION_TO_INSTALL_PYTEST,
+    'pyvista/pyvista': MAP_VERSION_TO_INSTALL_PYVISTA,
+    'scikit-learn/scikit-learn': MAP_VERSION_TO_INSTALL_SKLEARN,
+    'sphinx-doc/sphinx': MAP_VERSION_TO_INSTALL_SPHINX,
+    'sqlfluff/sqlfluff': MAP_VERSION_TO_INSTALL_SQLFLUFF,
+    'swe-bench/humaneval': MAP_VERSION_TO_INSTALL_HUMANEVAL,
+    'nielstron/humaneval_fix': MAP_VERSION_TO_INSTALL_HUMANEVAL_FIX,
+    'sympy/sympy': MAP_VERSION_TO_INSTALL_SYMPY,
+}
+
+# Constants - Repository Specific Installation Instructions
+MAP_REPO_TO_INSTALL = {}
+
+# Constants - Task Instance Test Frameworks
+TEST_PYTEST_VERBOSE = 'pytest -rA --tb=long -p no:cacheprovider'
+MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE = {
+    'astropy/astropy': {
+        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_ASTROPY.keys()
+    },
+    'django/django': {
+        k: './tests/runtests.py --verbosity 2 --settings=test_sqlite --parallel 1'
+        for k in MAP_VERSION_TO_INSTALL_DJANGO.keys()
+    },
+    'marshmallow-code/marshmallow': {
+        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_MARSHMALLOW.keys()
+    },
+    'matplotlib/matplotlib': {
+        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_MATPLOTLIB.keys()
+    },
+    'mwaskom/seaborn': {
+        k: 'pytest -rA --tb=long' for k in MAP_VERSION_TO_INSTALL_SEABORN.keys()
+    },
+    'pallets/flask': {
+        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_FLASK.keys()
+    },
+    'psf/requests': {
+        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_REQUESTS.keys()
+    },
+    'pvlib/pvlib-python': {
+        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PVLIB.keys()
+    },
+    'pydata/xarray': {
+        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_XARRAY.keys()
+    },
+    'pydicom/pydicom': {
+        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYDICOM.keys()
+    },
+    'pylint-dev/astroid': {
+        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_ASTROID.keys()
+    },
+    'pylint-dev/pylint': {
+        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYLINT.keys()
+    },
+    'pytest-dev/pytest': {
+        k: 'pytest -rA --tb=long' for k in MAP_VERSION_TO_INSTALL_PYTEST.keys()
+    },
+    'pyvista/pyvista': {
+        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYVISTA.keys()
+    },
+    'scikit-learn/scikit-learn': {
+        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_SKLEARN.keys()
+    },
+    'sphinx-doc/sphinx': {
+        k: 'tox -epy39 -v --' for k in MAP_VERSION_TO_INSTALL_SPHINX.keys()
+    },
+    'sqlfluff/sqlfluff': {
+        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_SQLFLUFF.keys()
+    },
+    'swe-bench/humaneval': {
+        k: 'python' for k in MAP_VERSION_TO_INSTALL_HUMANEVAL.keys()
+    },
+    'nielstron/humaneval_fix': {
+        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_HUMANEVAL.keys()
+    },
+    'sympy/sympy': {
+        k: 'bin/test -C --verbose' for k in MAP_VERSION_TO_INSTALL_SYMPY.keys()
+    },
+}
+MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE['django/django']['1.9'] = (
+    './tests/runtests.py --verbosity 2'
+)
@@ -3,7 +3,7 @@ import copy
 import json
 import os
 import tempfile
-from typing import Any
+from typing import Any, Literal

 import pandas as pd
 import toml
@@ -17,6 +17,11 @@ from evaluation.benchmarks.swe_bench.binary_patch_utils import (
 from evaluation.benchmarks.swe_bench.resource.mapping import (
    get_instance_resource_factor,
 )
+from evaluation.benchmarks.swe_bench.resource.swt_bench_constants import (
+    MAP_REPO_TO_INSTALL,
+    MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE,
+    MAP_VERSION_TO_INSTALL,
+)
 from evaluation.utils.shared import (
    EvalException,
    EvalMetadata,
@@ -55,6 +60,7 @@ from openhands.utils.shutdown_listener import sleep_if_should_continue

 USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
 RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true'
+BenchMode = Literal['swe', 'swt', 'swt-ci']


 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
@@ -68,7 +74,36 @@ def _get_swebench_workspace_dir_name(instance: pd.Series) -> str:

 def get_instruction(instance: pd.Series, metadata: EvalMetadata) -> MessageAction:
    workspace_dir_name = _get_swebench_workspace_dir_name(instance)
-    instruction = f"""
+    mode = metadata.details['mode']
+    if mode.startswith('swt'):
+        test_instructions = (
+            f'The following command can be used to run the tests: `{list(MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE[instance.repo].values())[0]}`. Make sure they fail in the expected way.\n'
+            if mode.endswith('ci')
+            else ''
+        )
+        instruction = f"""\
+<uploaded_files>
+/workspace/{workspace_dir_name}
+</uploaded_files>
+I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following issue description:
+
+<issue_description>
+{instance.problem_statement}
+</issue_description>
+
+
+Can you help me implement the necessary changes to the repository to test whether the issue in <issue_description> was resolved?
+I will take care of all changes to any of the non-test files. This means you DON'T have to modify the actual logic and ONLY have to update test logic and tests!
+Your task is to make the minimal changes to tests files in the /workspace directory to reproduce the issue in the <issue_description>, i.e., such that the generated tests fail in the current state (where the issue is unresolved) and pass when the issue will be resolved.
+Follow these steps to reproduce the issue:
+1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.
+2. Create a script `reproduction.py` to reproduce the error and execute it with `python reproduction.py` using the BashTool, to confirm the error
+3. Edit the sourcecode of the repo to integrate your reproduction script into the test framework
+4. Run the test framework and make sure your tests fail! Only submit FAILING tests! Never submit passing tests.
+{test_instructions}Your thinking should be thorough and so it's fine if it's very long.
+"""
+    else:
+        instruction = f"""
 <uploaded_files>
 /workspace/{workspace_dir_name}
 </uploaded_files>
@@ -96,7 +131,7 @@ Phase 1. READING: read the problem and reword it in clearer terms
 Phase 2. RUNNING: install and run the tests on the repository
   2.1 Follow the readme
   2.2 Install the environment and anything needed
-   2.2 Iterate and figure out how to run the tests 
+   2.2 Iterate and figure out how to run the tests

 Phase 3. EXPLORATION: find the files that are related to the problem and possible solutions
   3.1 Use `grep` to search for relevant methods, classes, keywords and error messages.
@@ -225,9 +260,9 @@ def get_config(
        )
    )
    agent_config = AgentConfig(
-        codeact_enable_jupyter=False,
-        codeact_enable_browsing=RUN_WITH_BROWSING,
-        codeact_enable_llm_editor=False,
+        enable_jupyter=False,
+        enable_browsing=RUN_WITH_BROWSING,
+        enable_llm_editor=False,
        condenser=metadata.condenser_config,
        enable_prompt_extensions=False,
    )
@@ -238,6 +273,7 @@ def get_config(
 def initialize_runtime(
    runtime: Runtime,
    instance: pd.Series,  # this argument is not required
+    metadata: EvalMetadata,
 ):
    """Initialize the runtime for the agent.

@@ -355,6 +391,30 @@ def initialize_runtime(
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')

+    if metadata.details['mode'] == 'swt-ci':
+        # set up repo
+        setup_commands = []
+        if instance['repo'] in MAP_REPO_TO_INSTALL:
+            setup_commands.append(MAP_REPO_TO_INSTALL[instance['repo']])
+
+        # Run pre-install set up if provided
+        install = MAP_VERSION_TO_INSTALL.get(instance['repo'], {}).get(
+            instance['version'], []
+        )
+        if 'pre_install' in install:
+            for pre_install in install['pre_install']:
+                setup_commands.append(pre_install)
+
+        if 'install' in install:
+            setup_commands.append(install['install'])
+
+        for command in setup_commands:
+            action = CmdRunAction(command=command)
+            action.set_hard_timeout(600)
+            logger.info(action, extra={'msg_type': 'ACTION'})
+            obs = runtime.run_action(action)
+            logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+
    if 'multimodal' not in metadata.dataset.lower():
        # Only for non-multimodal datasets, we need to activate the testbed environment for Python
        # SWE-Bench multimodal datasets are not using the testbed environment
@@ -577,7 +637,7 @@ def process_instance(
    call_async_from_sync(runtime.connect)

    try:
-        initialize_runtime(runtime, instance)
+        initialize_runtime(runtime, instance, metadata)

        message_action = get_instruction(instance, metadata)

@@ -677,6 +737,13 @@ if __name__ == '__main__':
        default='test',
        help='split to evaluate on',
    )
+    parser.add_argument(
+        '--mode',
+        type=str,
+        default='swe',
+        choices=['swe', 'swt', 'swt-ci'],
+        help="mode to run the evaluation, either 'swe', 'swt', or 'swt-ci'",
+    )
    args, _ = parser.parse_known_args()

    # NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
@@ -713,7 +780,7 @@ if __name__ == '__main__':
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

-    details = {}
+    details = {'mode': args.mode}
    _agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)

    dataset_descrption = (
@@ -863,7 +930,7 @@ if __name__ == '__main__':
                    # Also make sure git_patch is not empty - otherwise we fall back to previous attempt (empty patch is worse than anything else)
                    if (
                        instance['instance_id'] not in added_instance_ids
-                        and instance['test_result']['git_patch'].strip()
+                        and instance['test_result'].get('git_patch', '').strip()
                    ):
                        fout.write(line)
                        added_instance_ids.add(instance['instance_id'])
@@ -12,6 +12,7 @@ NUM_WORKERS=$6
 DATASET=$7
 SPLIT=$8
 N_RUNS=$9
+MODE=${10}

 if [ -z "$NUM_WORKERS" ]; then
  NUM_WORKERS=1
@@ -45,6 +46,11 @@ if [ -z "$SPLIT" ]; then
  SPLIT="test"
 fi

+if [ -z "$MODE" ]; then
+  MODE="swe"
+  echo "MODE not specified, use default $MODE"
+fi
+
 export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
 echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"

@@ -55,6 +61,10 @@ echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
 echo "MODEL_CONFIG: $MODEL_CONFIG"
 echo "DATASET: $DATASET"
 echo "SPLIT: $SPLIT"
+echo "MAX_ITER: $MAX_ITER"
+echo "NUM_WORKERS: $NUM_WORKERS"
+echo "COMMIT_HASH: $COMMIT_HASH"
+echo "MODE: $MODE"

 # Default to NOT use Hint
 if [ -z "$USE_HINT_TEXT" ]; then
@@ -74,9 +84,13 @@ fi
 if [ -n "$EXP_NAME" ]; then
  EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
 fi
+# if mode != swe, add mode to the eval note
+if [ "$MODE" != "swe" ]; then
+  EVAL_NOTE="${EVAL_NOTE}-${MODE}"
+fi

 function run_eval() {
-  local eval_note=$1
+  local eval_note="${1}"
  COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer.py \
    --agent-cls $AGENT \
    --llm-config $MODEL_CONFIG \
@@ -84,7 +98,8 @@ function run_eval() {
    --eval-num-workers $NUM_WORKERS \
    --eval-note $eval_note \
    --dataset $DATASET \
-    --split $SPLIT"
+    --split $SPLIT \
+    --mode $MODE"

  if [ -n "$EVAL_LIMIT" ]; then
    echo "EVAL_LIMIT: $EVAL_LIMIT"
@@ -0,0 +1,95 @@
+import argparse
+import json
+import logging
+
+import unidiff
+
+from evaluation.benchmarks.swe_bench.resource.swt_bench_constants import (
+    MAP_VERSION_TO_INSTALL,
+)
+
+_LOGGER = logging.getLogger(__name__)
+
+
+def remove_setup_files(model_patch: str, instance: dict, delete_setup_changes: bool):
+    """Discard all changes that a patch applies to files changes by the pre_install script and that are reproduction scripts (top-level script)"""
+    setup_files = ['setup.py', 'tox.ini', 'pyproject.toml']
+    pre_install = (
+        MAP_VERSION_TO_INSTALL.get(instance['repo'], {})
+        .get(instance['version'], {})
+        .get('pre_install', [])
+    )
+    relevant_files = (
+        [
+            file
+            for file in setup_files
+            if any(file in install and 'sed' in install for install in pre_install)
+        ]
+        if delete_setup_changes
+        else []
+    )
+    for i in range(10):
+        try:
+            # Appearently outputs.jsonl has .strip() applied, so we try to reconstruct the original patch by adding auxiliary whitespace
+            patch = unidiff.PatchSet(model_patch + i * '\n')
+            break
+        except unidiff.UnidiffParseError:
+            pass
+
+    to_delete = []
+    for i, file in enumerate(patch):
+        if (
+            any(f in file.source_file for f in relevant_files)
+            or file.target_file.count('/') == 1
+        ):
+            to_delete.append(i)
+    for i in reversed(to_delete):
+        del patch[i]
+    return str(patch)
+
+
+def main(
+    prediction_file: str,
+):
+    """Main function to extract the model patches from the OpenHands prediction file and turn them into the expected SWT-Bench format."""
+    with open(prediction_file) as f:
+        for line in f:
+            pred = json.loads(line)
+            try:
+                git_diff = pred['test_result']['git_patch']
+            except KeyError:
+                _LOGGER.warning(
+                    'Warning: No git diff found for instance %s', pred['instance_id']
+                )
+                continue
+            ci_mode = pred['metadata']['details'].get('mode', '') == 'swt-ci'
+            try:
+                git_diff = remove_setup_files(git_diff, pred['instance'], ci_mode)
+            except:
+                _LOGGER.warning(
+                    'Warning: Invalid git diff found for instance %s',
+                    pred['instance_id'],
+                )
+            print(
+                json.dumps(
+                    {
+                        'instance_id': pred['instance_id'],
+                        'model_name_or_path': f'{pred["metadata"]["llm_config"]["openrouter_app_name"]}__{pred["metadata"]["agent_class"]}__{pred["metadata"]["llm_config"]["model"]}',
+                        'model_patch': git_diff,
+                        'full_output': json.dumps(pred),
+                    }
+                )
+            )
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--prediction_file',
+        type=str,
+        required=True,
+        help='Path to the prediction file (.../outputs.jsonl)',
+    )
+    args = parser.parse_args()
+
+    main(args.prediction_file)
@@ -158,9 +158,9 @@ def get_config(
        )
    )
    agent_config = AgentConfig(
-        codeact_enable_jupyter=False,
-        codeact_enable_browsing=RUN_WITH_BROWSING,
-        codeact_enable_llm_editor=False,
+        enable_jupyter=False,
+        enable_browsing=RUN_WITH_BROWSING,
+        enable_llm_editor=False,
        condenser=metadata.condenser_config,
        enable_prompt_extensions=False,
    )
@@ -62,9 +62,9 @@ def get_config(
        )
    )
    agent_config = AgentConfig(
-        codeact_enable_jupyter=True,
-        codeact_enable_browsing=True,
-        codeact_enable_llm_editor=False,
+        enable_jupyter=True,
+        enable_browsing=True,
+        enable_llm_editor=False,
    )
    config.set_agent_config(agent_config)
    return config
@@ -1,9 +1,5 @@
 import { describe, it, expect, afterEach, vi } from "vitest";

-import { screen } from "@testing-library/react";
-import { renderWithProviders } from "../../test-utils";
-import { BrowserPanel } from "#/components/features/browser/browser";
-
 // Mock useParams before importing components
 vi.mock("react-router", async () => {
  const actual = await vi.importActual("react-router");
@@ -27,6 +23,10 @@ vi.mock("react-i18next", async () => {
  };
 });

+import { screen } from "@testing-library/react";
+import { renderWithProviders } from "../../test-utils";
+import { BrowserPanel } from "#/components/features/browser/browser";
+
 describe("Browser", () => {
  afterEach(() => {
    vi.clearAllMocks();
@@ -17,7 +17,7 @@ describe("CopyToClipboardButton", () => {
        isDisabled={false}
        onClick={() => {}}
        mode="copy"
-      />,
+      />
    );

    const button = screen.getByTestId("copy-to-clipboard");
@@ -31,7 +31,7 @@ describe("CopyToClipboardButton", () => {
        isDisabled={false}
        onClick={() => {}}
        mode="copied"
-      />,
+      />
    );

    const button = screen.getByTestId("copy-to-clipboard");
@@ -1,8 +1,8 @@
 import { describe, expect, it, vi, beforeEach } from "vitest";
 import { render, screen } from "@testing-library/react";
-import { useSelector } from "react-redux";
 import { ActionSuggestions } from "#/components/features/chat/action-suggestions";
 import { useAuth } from "#/context/auth-context";
+import { useSelector } from "react-redux";

 // Mock dependencies
 vi.mock("posthog-js", () => ({
@@ -24,9 +24,9 @@ vi.mock("react-i18next", () => ({
  useTranslation: () => ({
    t: (key: string) => {
      const translations: Record<string, string> = {
-        ACTION$PUSH_TO_BRANCH: "Push to Branch",
-        ACTION$PUSH_CREATE_PR: "Push & Create PR",
-        ACTION$PUSH_CHANGES_TO_PR: "Push Changes to PR",
+        "ACTION$PUSH_TO_BRANCH": "Push to Branch",
+        "ACTION$PUSH_CREATE_PR": "Push & Create PR",
+        "ACTION$PUSH_CHANGES_TO_PR": "Push Changes to PR"
      };
      return translations[key] || key;
    },
@@ -217,6 +217,17 @@ describe("ChatInput", () => {
    expect(onImagePaste).toHaveBeenCalledWith([file]);
  });

+  it("should use the default maxRows value", () => {
+    // We can't directly test the maxRows prop as it's not exposed in the DOM
+    // Instead, we'll verify the component renders with the default props
+    render(<ChatInput onSubmit={onSubmitMock} />);
+    const textarea = screen.getByRole("textbox");
+    expect(textarea).toBeInTheDocument();
+
+    // The actual verification of maxRows=16 is handled internally by the TextareaAutosize component
+    // and affects how many rows the textarea can expand to
+  });
+
  it("should not submit when Enter is pressed during IME composition", async () => {
    const user = userEvent.setup();
    render(<ChatInput onSubmit={onSubmitMock} />);
@@ -3,36 +3,46 @@ import { it, describe, expect, vi, beforeAll, afterAll } from "vitest";
 import userEvent from "@testing-library/user-event";
 import { AuthModal } from "#/components/features/waitlist/auth-modal";
 import * as CaptureConsent from "#/utils/handle-capture-consent";
+import * as AuthHook from "#/context/auth-context";

 describe("AuthModal", () => {
  beforeAll(() => {
    vi.stubGlobal("location", { href: "" });
+    vi.spyOn(AuthHook, "useAuth").mockReturnValue({
+      providersAreSet: false,
+      setProvidersAreSet: vi.fn(),
+      providerTokensSet: [],
+      setProviderTokensSet: vi.fn()
+    });
  });

  afterAll(() => {
    vi.unstubAllGlobals();
+    vi.restoreAllMocks();
  });

  it("should render a tos checkbox that is unchecked by default", () => {
-    render(<AuthModal githubAuthUrl={null} />);
+    render(<AuthModal githubAuthUrl={null} appMode="saas" />);
    const checkbox = screen.getByRole("checkbox");

    expect(checkbox).not.toBeChecked();
  });

-  it("should only enable the GitHub button if the tos checkbox is checked", async () => {
+  it("should only enable the identity provider buttons if the tos checkbox is checked", async () => {
    const user = userEvent.setup();
-    render(<AuthModal githubAuthUrl={null} />);
-    const checkbox = screen.getByRole("checkbox");
-    const button = screen.getByRole("button", {
-      name: "GITHUB$CONNECT_TO_GITHUB",
-    });
+    render(<AuthModal githubAuthUrl={null} appMode="saas" />);

-    expect(button).toBeDisabled();
+    const checkbox = screen.getByRole("checkbox");
+    const githubButton = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
+    const gitlabButton = screen.getByRole("button", { name: "GITLAB$CONNECT_TO_GITLAB" });
+
+    expect(githubButton).toBeDisabled();
+    expect(gitlabButton).toBeDisabled();

    await user.click(checkbox);

-    expect(button).not.toBeDisabled();
+    expect(githubButton).not.toBeDisabled();
+    expect(gitlabButton).not.toBeDisabled();
  });

  it("should set user analytics consent to true when the user checks the tos checkbox", async () => {
@@ -42,14 +52,12 @@ describe("AuthModal", () => {
    );

    const user = userEvent.setup();
-    render(<AuthModal githubAuthUrl="mock-url" />);
+    render(<AuthModal githubAuthUrl="mock-url" appMode="saas" />);

    const checkbox = screen.getByRole("checkbox");
    await user.click(checkbox);

-    const button = screen.getByRole("button", {
-      name: "GITHUB$CONNECT_TO_GITHUB",
-    });
+    const button = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
    await user.click(button);

    expect(handleCaptureConsentSpy).toHaveBeenCalledWith(true);
@@ -16,6 +16,8 @@ import { ConversationCard } from "#/components/features/conversation-panel/conve
 import { clickOnEditButton } from "./utils";

 // We'll use the actual i18next implementation but override the translation function
+import { I18nextProvider } from "react-i18next";
+import i18n from "i18next";

 // Mock the t function to return our custom translations
 vi.mock("react-i18next", async () => {
@@ -25,9 +27,9 @@ vi.mock("react-i18next", async () => {
    useTranslation: () => ({
      t: (key: string) => {
        const translations: Record<string, string> = {
-          CONVERSATION$CREATED: "Created",
-          CONVERSATION$AGO: "ago",
-          CONVERSATION$UPDATED: "Updated",
+          "CONVERSATION$CREATED": "Created",
+          "CONVERSATION$AGO": "ago",
+          "CONVERSATION$UPDATED": "Updated"
        };
        return translations[key] || key;
      },
@@ -80,9 +82,7 @@ describe("ConversationCard", () => {
    expect(card).toHaveTextContent("ago");

    // Use a regex to match the time part since it might have whitespace
-    const timeRegex = new RegExp(
-      formatTimeDelta(new Date("2021-10-01T12:00:00Z")),
-    );
+    const timeRegex = new RegExp(formatTimeDelta(new Date("2021-10-01T12:00:00Z")));
    expect(card).toHaveTextContent(timeRegex);
  });

@@ -1,13 +1,19 @@
 import { screen, waitFor, within } from "@testing-library/react";
 import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
-import { QueryClientConfig } from "@tanstack/react-query";
+import {
+  QueryClientProvider,
+  QueryClient,
+  QueryClientConfig,
+} from "@tanstack/react-query";
 import userEvent from "@testing-library/user-event";
 import { createRoutesStub } from "react-router";
 import React from "react";
-import { renderWithProviders } from "test-utils";
 import { ConversationPanel } from "#/components/features/conversation-panel/conversation-panel";
 import OpenHands from "#/api/open-hands";
+import { AuthProvider } from "#/context/auth-context";
 import { clickOnEditButton } from "./utils";
+import { queryClientConfig } from "#/query-client-config";
+import { renderWithProviders } from "test-utils";

 describe("ConversationPanel", () => {
  const onCloseMock = vi.fn();
@@ -23,9 +29,9 @@ describe("ConversationPanel", () => {
      preloadedState: {
        metrics: {
          cost: null,
-          usage: null,
-        },
-      },
+          usage: null
+        }
+      }
    });

  const { endSessionMock } = vi.hoisted(() => ({
@@ -78,9 +84,7 @@ describe("ConversationPanel", () => {
    vi.clearAllMocks();
    vi.restoreAllMocks();
    // Setup default mock for getUserConversations
-    vi.spyOn(OpenHands, "getUserConversations").mockResolvedValue([
-      ...mockConversations,
-    ]);
+    vi.spyOn(OpenHands, "getUserConversations").mockResolvedValue([...mockConversations]);
  });

  it("should render the conversations", async () => {
@@ -134,9 +138,7 @@ describe("ConversationPanel", () => {
    const cancelButton = screen.getByRole("button", { name: /cancel/i });
    await user.click(cancelButton);

-    expect(
-      screen.queryByRole("button", { name: /cancel/i }),
-    ).not.toBeInTheDocument();
+    expect(screen.queryByRole("button", { name: /cancel/i })).not.toBeInTheDocument();

    // Ensure the conversation is not deleted
    cards = await screen.findAllByTestId("conversation-card");
@@ -149,22 +151,19 @@ describe("ConversationPanel", () => {
    const getUserConversationsSpy = vi.spyOn(OpenHands, "getUserConversations");
    getUserConversationsSpy.mockImplementation(async () => mockData);

-    const deleteUserConversationSpy = vi.spyOn(
-      OpenHands,
-      "deleteUserConversation",
-    );
+    const deleteUserConversationSpy = vi.spyOn(OpenHands, "deleteUserConversation");
    deleteUserConversationSpy.mockImplementation(async (id: string) => {
-      const index = mockData.findIndex((conv) => conv.conversation_id === id);
+      const index = mockData.findIndex(conv => conv.conversation_id === id);
      if (index !== -1) {
        mockData.splice(index, 1);
      }
      // Wait for React Query to update its cache
-      await new Promise((resolve) => setTimeout(resolve, 0));
+      await new Promise(resolve => setTimeout(resolve, 0));
    });

    renderConversationPanel();

-    const cards = await screen.findAllByTestId("conversation-card");
+    let cards = await screen.findAllByTestId("conversation-card");
    const ellipsisButton = within(cards[1]).getByTestId("ellipsis-button");
    await user.click(ellipsisButton);
    const deleteButton = screen.getByTestId("delete-button");
@@ -176,18 +175,13 @@ describe("ConversationPanel", () => {
    const confirmButton = screen.getByRole("button", { name: /confirm/i });
    await user.click(confirmButton);

-    expect(
-      screen.queryByRole("button", { name: /confirm/i }),
-    ).not.toBeInTheDocument();
+    expect(screen.queryByRole("button", { name: /confirm/i })).not.toBeInTheDocument();

    // Wait for the cards to update with a longer timeout
-    await waitFor(
-      () => {
-        const updatedCards = screen.getAllByTestId("conversation-card");
-        expect(updatedCards).toHaveLength(2);
-      },
-      { timeout: 2000 },
-    );
+    await waitFor(() => {
+      const updatedCards = screen.getAllByTestId("conversation-card");
+      expect(updatedCards).toHaveLength(2);
+    }, { timeout: 2000 });

    expect(endSessionMock).toHaveBeenCalledOnce();
  });
@@ -224,12 +218,9 @@ describe("ConversationPanel", () => {
    const getUserConversationsSpy = vi.spyOn(OpenHands, "getUserConversations");
    getUserConversationsSpy.mockImplementation(async () => mockData);

-    const deleteUserConversationSpy = vi.spyOn(
-      OpenHands,
-      "deleteUserConversation",
-    );
+    const deleteUserConversationSpy = vi.spyOn(OpenHands, "deleteUserConversation");
    deleteUserConversationSpy.mockImplementation(async (id: string) => {
-      const index = mockData.findIndex((conv) => conv.conversation_id === id);
+      const index = mockData.findIndex(conv => conv.conversation_id === id);
      if (index !== -1) {
        mockData.splice(index, 1);
      }
@@ -237,7 +228,7 @@ describe("ConversationPanel", () => {

    renderConversationPanel();

-    const cards = await screen.findAllByTestId("conversation-card");
+    let cards = await screen.findAllByTestId("conversation-card");
    expect(cards).toHaveLength(3);

    const ellipsisButton = within(cards[0]).getByTestId("ellipsis-button");
@@ -251,9 +242,7 @@ describe("ConversationPanel", () => {
    const confirmButton = screen.getByRole("button", { name: /confirm/i });
    await user.click(confirmButton);

-    expect(
-      screen.queryByRole("button", { name: /confirm/i }),
-    ).not.toBeInTheDocument();
+    expect(screen.queryByRole("button", { name: /confirm/i })).not.toBeInTheDocument();

    // Wait for the cards to update
    await waitFor(() => {
@@ -359,9 +348,9 @@ describe("ConversationPanel", () => {
      preloadedState: {
        metrics: {
          cost: null,
-          usage: null,
-        },
-      },
+          usage: null
+        }
+      }
    });

    const toggleButton = screen.getByText("Toggle");
@@ -61,25 +61,25 @@ describe("PaymentForm", () => {
    renderPaymentForm();

    const topUpInput = await screen.findByTestId("top-up-input");
-    await user.type(topUpInput, "50.12");
+    await user.type(topUpInput, "50");

    const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
    await user.click(topUpButton);

-    expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50.12);
+    expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50);
  });

-  it("should round the top-up amount to two decimal places", async () => {
+  it("should only accept integer values", async () => {
    const user = userEvent.setup();
    renderPaymentForm();

    const topUpInput = await screen.findByTestId("top-up-input");
-    await user.type(topUpInput, "50.125456");
+    await user.type(topUpInput, "50");

    const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
    await user.click(topUpButton);

-    expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50.13);
+    expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50);
  });

  it("should disable the top-up button if the user enters an invalid amount", async () => {
@@ -100,7 +100,7 @@ describe("PaymentForm", () => {
    renderPaymentForm();

    const topUpInput = await screen.findByTestId("top-up-input");
-    await user.type(topUpInput, "50.12");
+    await user.type(topUpInput, "50");

    const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
    await user.click(topUpButton);
@@ -114,7 +114,7 @@ describe("PaymentForm", () => {
      renderPaymentForm();

      const topUpInput = await screen.findByTestId("top-up-input");
-      await user.type(topUpInput, "-50.12");
+      await user.type(topUpInput, "-50");

      const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
      await user.click(topUpButton);
@@ -139,6 +139,8 @@ describe("PaymentForm", () => {
      const user = userEvent.setup();
      renderPaymentForm();

+      // With type="number", the browser would prevent non-numeric input,
+      // but we'll test the validation logic anyway
      const topUpInput = await screen.findByTestId("top-up-input");
      await user.type(topUpInput, "abc");

@@ -160,5 +162,19 @@ describe("PaymentForm", () => {

      expect(createCheckoutSessionSpy).not.toHaveBeenCalled();
    });
+
+    test("user enters a decimal value", async () => {
+      const user = userEvent.setup();
+      renderPaymentForm();
+
+      // With step="1", the browser would validate this, but we'll test our validation logic
+      const topUpInput = await screen.findByTestId("top-up-input");
+      await user.type(topUpInput, "50.5");
+
+      const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
+      await user.click(topUpButton);
+
+      expect(createCheckoutSessionSpy).not.toHaveBeenCalled();
+    });
  });
 });
@@ -1,11 +1,5 @@
 import { afterEach, describe, expect, it, vi } from "vitest";

-import { screen } from "@testing-library/react";
-import userEvent from "@testing-library/user-event";
-import { renderWithProviders } from "test-utils";
-import { FeedbackForm } from "#/components/features/feedback/feedback-form";
-import { I18nKey } from "#/i18n/declaration";
-
 // Mock useParams before importing components
 vi.mock("react-router", async () => {
  const actual = await vi.importActual("react-router");
@@ -15,6 +9,12 @@ vi.mock("react-router", async () => {
  };
 });

+import { screen } from "@testing-library/react";
+import userEvent from "@testing-library/user-event";
+import { renderWithProviders } from "test-utils";
+import { FeedbackForm } from "#/components/features/feedback/feedback-form";
+import { I18nKey } from "#/i18n/declaration";
+
 describe("FeedbackForm", () => {
  const user = userEvent.setup();
  const onCloseMock = vi.fn();
@@ -1,8 +1,17 @@
-import { screen } from "@testing-library/react";
-import { describe, it, expect } from "vitest";
-import { renderWithProviders } from "test-utils";
+import { render, screen } from "@testing-library/react";
+import { describe, it, expect, vi } from "vitest";
 import { Messages } from "#/components/features/chat/messages";
 import type { Message } from "#/message";
+import { renderWithProviders } from "test-utils";
+
+// Mock the useParams hook to provide a conversationId
+vi.mock("react-router", async () => {
+  const actual = await vi.importActual<typeof import("react-router")>("react-router");
+  return {
+    ...actual,
+    useParams: () => ({ conversationId: "test-conversation-id" }),
+  };
+});

 describe("File Operations Messages", () => {
  it("should show success indicator for successful file read operation", () => {
@@ -17,9 +26,7 @@ describe("File Operations Messages", () => {
      },
    ];

-    renderWithProviders(
-      <Messages messages={messages} isAwaitingUserConfirmation={false} />,
-    );
+    renderWithProviders(<Messages messages={messages} isAwaitingUserConfirmation={false} />);

    const statusIcon = screen.getByTestId("status-icon");
    expect(statusIcon).toBeInTheDocument();
@@ -38,9 +45,7 @@ describe("File Operations Messages", () => {
      },
    ];

-    renderWithProviders(
-      <Messages messages={messages} isAwaitingUserConfirmation={false} />,
-    );
+    renderWithProviders(<Messages messages={messages} isAwaitingUserConfirmation={false} />);

    const statusIcon = screen.getByTestId("status-icon");
    expect(statusIcon).toBeInTheDocument();
@@ -59,9 +64,7 @@ describe("File Operations Messages", () => {
      },
    ];

-    renderWithProviders(
-      <Messages messages={messages} isAwaitingUserConfirmation={false} />,
-    );
+    renderWithProviders(<Messages messages={messages} isAwaitingUserConfirmation={false} />);

    const statusIcon = screen.getByTestId("status-icon");
    expect(statusIcon).toBeInTheDocument();
@@ -80,9 +83,7 @@ describe("File Operations Messages", () => {
      },
    ];

-    renderWithProviders(
-      <Messages messages={messages} isAwaitingUserConfirmation={false} />,
-    );
+    renderWithProviders(<Messages messages={messages} isAwaitingUserConfirmation={false} />);

    const statusIcon = screen.getByTestId("status-icon");
    expect(statusIcon).toBeInTheDocument();
@@ -1,7 +1,7 @@
+import { ImagePreview } from "#/components/features/images/image-preview";
 import { render, screen } from "@testing-library/react";
 import userEvent from "@testing-library/user-event";
 import { describe, expect, it, vi } from "vitest";
-import { ImagePreview } from "#/components/features/images/image-preview";

 describe("ImagePreview", () => {
  it("should render an image", () => {
@@ -1,4 +1,4 @@
-import { render, screen, within } from "@testing-library/react";
+import { render, screen, within, fireEvent } from "@testing-library/react";
 import userEvent from "@testing-library/user-event";
 import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
 import { InteractiveChatBox } from "#/components/features/chat/interactive-chat-box";
@@ -144,7 +144,7 @@ describe("InteractiveChatBox", () => {
        onStop={onStop}
        onChange={onChange}
        value="test message"
-      />,
+      />
    );

    // Upload an image via the upload button - this should NOT clear the text input
@@ -173,7 +173,7 @@ describe("InteractiveChatBox", () => {
        onStop={onStop}
        onChange={onChange}
        value=""
-      />,
+      />
    );

    // Verify the text input was cleared
@@ -1,9 +1,9 @@
 import { render, screen } from "@testing-library/react";
 import { Provider } from "react-redux";
 import { configureStore } from "@reduxjs/toolkit";
-import { describe, it, expect } from "vitest";
 import { JupyterEditor } from "#/components/features/jupyter/jupyter";
 import { jupyterReducer } from "#/state/jupyter-slice";
+import { vi, describe, it, expect } from "vitest";

 describe("JupyterEditor", () => {
  const mockStore = configureStore({
@@ -36,7 +36,7 @@ describe("JupyterEditor", () => {
        <div style={{ height: "100vh" }}>
          <JupyterEditor maxWidth={800} />
        </div>
-      </Provider>,
+      </Provider>
    );

    const container = screen.getByTestId("jupyter-container");
@@ -5,13 +5,7 @@ import translations from "../../src/i18n/translation.json";
 import { UserAvatar } from "../../src/components/features/sidebar/user-avatar";

 vi.mock("@heroui/react", () => ({
-  Tooltip: ({
-    content,
-    children,
-  }: {
-    content: string;
-    children: React.ReactNode;
-  }) => (
+  Tooltip: ({ content, children }: { content: string; children: React.ReactNode }) => (
    <div>
      {children}
      <div>{content}</div>
@@ -19,33 +13,15 @@ vi.mock("@heroui/react", () => ({
  ),
 }));

-const supportedLanguages = [
-  "en",
-  "ja",
-  "zh-CN",
-  "zh-TW",
-  "ko-KR",
-  "de",
-  "no",
-  "it",
-  "pt",
-  "es",
-  "ar",
-  "fr",
-  "tr",
-];
+const supportedLanguages = ['en', 'ja', 'zh-CN', 'zh-TW', 'ko-KR', 'de', 'no', 'it', 'pt', 'es', 'ar', 'fr', 'tr'];

 // Helper function to check if a translation exists for all supported languages
 function checkTranslationExists(key: string) {
  const missingTranslations: string[] = [];

-  const translationEntry = (
-    translations as Record<string, Record<string, string>>
-  )[key];
+  const translationEntry = (translations as Record<string, Record<string, string>>)[key];
  if (!translationEntry) {
-    throw new Error(
-      `Translation key "${key}" does not exist in translation.json`,
-    );
+    throw new Error(`Translation key "${key}" does not exist in translation.json`);
  }

  for (const lang of supportedLanguages) {
@@ -77,9 +53,7 @@ function findDuplicateKeys(obj: Record<string, any>) {
 vi.mock("react-i18next", () => ({
  useTranslation: () => ({
    t: (key: string) => {
-      const translationEntry = (
-        translations as Record<string, Record<string, string>>
-      )[key];
+      const translationEntry = (translations as Record<string, Record<string, string>>)[key];
      return translationEntry?.ja || key;
    },
  }),
@@ -88,7 +62,7 @@ vi.mock("react-i18next", () => ({
 describe("Landing page translations", () => {
  test("should render Japanese translations correctly", () => {
    // Mock a simple component that uses the translations
-    function TestComponent() {
+    const TestComponent = () => {
      const { t } = useTranslation();
      return (
        <div>
@@ -121,16 +95,14 @@ describe("Landing page translations", () => {
          </div>
        </div>
      );
-    }
+    };

    render(<TestComponent />);

    // Check main content translations
    expect(screen.getByText("開発を始めましょう！")).toBeInTheDocument();
    expect(screen.getByText("VS Codeで開く")).toBeInTheDocument();
-    expect(
-      screen.getByText("テストカバレッジを向上させる"),
-    ).toBeInTheDocument();
+    expect(screen.getByText("テストカバレッジを向上させる")).toBeInTheDocument();
    expect(screen.getByText("Dependabot PRを自動マージ")).toBeInTheDocument();
    expect(screen.getByText("READMEを改善")).toBeInTheDocument();
    expect(screen.getByText("依存関係を整理")).toBeInTheDocument();
@@ -148,12 +120,8 @@ describe("Landing page translations", () => {
    expect(tabs).toHaveTextContent("コードエディタ");

    // Check workspace label and new project button
-    expect(screen.getByTestId("workspace-label")).toHaveTextContent(
-      "ワークスペース",
-    );
-    expect(screen.getByTestId("new-project")).toHaveTextContent(
-      "新規プロジェクト",
-    );
+    expect(screen.getByTestId("workspace-label")).toHaveTextContent("ワークスペース");
+    expect(screen.getByTestId("new-project")).toHaveTextContent("新規プロジェクト");

    // Check status messages
    const status = screen.getByTestId("status");
@@ -191,12 +159,12 @@ describe("Landing page translations", () => {
      "STATUS$CONNECTED_TO_SERVER",
      "TIME$MINUTES_AGO",
      "TIME$HOURS_AGO",
-      "TIME$DAYS_AGO",
+      "TIME$DAYS_AGO"
    ];

    // Check all keys and collect missing translations
    const missingTranslationsMap = new Map<string, string[]>();
-    translationKeys.forEach((key) => {
+    translationKeys.forEach(key => {
      const missing = checkTranslationExists(key);
      if (missing.length > 0) {
        missingTranslationsMap.set(key, missing);
@@ -206,11 +174,8 @@ describe("Landing page translations", () => {
    // If any translations are missing, throw an error with all missing translations
    if (missingTranslationsMap.size > 0) {
      const errorMessage = Array.from(missingTranslationsMap.entries())
-        .map(
-          ([key, langs]) =>
-            `\n- "${key}" is missing translations for: ${langs.join(", ")}`,
-        )
-        .join("");
+        .map(([key, langs]) => `\n- "${key}" is missing translations for: ${langs.join(', ')}`)
+        .join('');
      throw new Error(`Missing translations:${errorMessage}`);
    }
  });
@@ -219,9 +184,7 @@ describe("Landing page translations", () => {
    const duplicates = findDuplicateKeys(translations);

    if (duplicates.length > 0) {
-      throw new Error(
-        `Found duplicate translation keys: ${duplicates.join(", ")}`,
-      );
+      throw new Error(`Found duplicate translation keys: ${duplicates.join(', ')}`);
    }
  });
 });
@@ -5,7 +5,7 @@ import { Command, appendInput, appendOutput } from "#/state/command-slice";
 import Terminal from "#/components/features/terminal/terminal";

 const renderTerminal = (commands: Command[] = []) =>
-  renderWithProviders(<Terminal secrets={[]} />, {
+  renderWithProviders(<Terminal />, {
    preloadedState: {
      cmd: {
        commands,
@@ -121,7 +121,7 @@ describe.skip("Terminal", () => {

  // This test fails because it expects `disposeMock` to have been called before the component is unmounted.
  it.skip("should dispose the terminal on unmount", () => {
-    const { unmount } = renderWithProviders(<Terminal secrets={[]} />);
+    const { unmount } = renderWithProviders(<Terminal />);

    expect(mockTerminal.dispose).not.toHaveBeenCalled();

@@ -1,31 +1,31 @@
 import { beforeAll, describe, expect, it, vi } from "vitest";
-import { render } from "@testing-library/react";
 import { afterEach } from "node:test";
-import { ReactNode } from "react";
 import { useTerminal } from "#/hooks/use-terminal";
 import { Command } from "#/state/command-slice";
+import { AgentState } from "#/types/agent-state";
+import { renderWithProviders } from "../../test-utils";
+
+// Mock the WsClient context
+vi.mock("#/context/ws-client-provider", () => ({
+  useWsClient: () => ({
+    send: vi.fn(),
+    status: "CONNECTED",
+    isLoadingMessages: false,
+    events: [],
+  }),
+}));

 interface TestTerminalComponentProps {
  commands: Command[];
-  secrets: string[];
 }

 function TestTerminalComponent({
  commands,
-  secrets,
 }: TestTerminalComponentProps) {
-  const ref = useTerminal({ commands, secrets, disabled: false });
+  const ref = useTerminal({ commands });
  return <div ref={ref} />;
 }

-interface WrapperProps {
-  children: ReactNode;
-}
-
-function Wrapper({ children }: WrapperProps) {
-  return <div>{children}</div>;
-}
-
 describe("useTerminal", () => {
  const mockTerminal = vi.hoisted(() => ({
    loadAddon: vi.fn(),
@@ -57,8 +57,11 @@ describe("useTerminal", () => {
  });

  it("should render", () => {
-    render(<TestTerminalComponent commands={[]} secrets={[]} />, {
-      wrapper: Wrapper,
+    renderWithProviders(<TestTerminalComponent commands={[]} />, {
+      preloadedState: {
+        agent: { curAgentState: AgentState.RUNNING },
+        cmd: { commands: [] },
+      },
    });
  });

@@ -68,15 +71,19 @@ describe("useTerminal", () => {
      { content: "hello", type: "output" },
    ];

-    render(<TestTerminalComponent commands={commands} secrets={[]} />, {
-      wrapper: Wrapper,
+    renderWithProviders(<TestTerminalComponent commands={commands} />, {
+      preloadedState: {
+        agent: { curAgentState: AgentState.RUNNING },
+        cmd: { commands },
+      },
    });

    expect(mockTerminal.writeln).toHaveBeenNthCalledWith(1, "echo hello");
    expect(mockTerminal.writeln).toHaveBeenNthCalledWith(2, "hello");
  });

-  it("should hide secrets in the terminal", () => {
+  // This test is no longer relevant as secrets filtering has been removed
+  it.skip("should hide secrets in the terminal", () => {
    const secret = "super_secret_github_token";
    const anotherSecret = "super_secret_another_token";
    const commands: Command[] = [
@@ -87,23 +94,18 @@ describe("useTerminal", () => {
      { content: secret, type: "output" },
    ];

-    render(
+    renderWithProviders(
      <TestTerminalComponent
        commands={commands}
-        secrets={[secret, anotherSecret]}
      />,
      {
-        wrapper: Wrapper,
+        preloadedState: {
+          agent: { curAgentState: AgentState.RUNNING },
+          cmd: { commands },
+        },
      },
    );

-    // BUG: `vi.clearAllMocks()` does not clear the number of calls
-    // therefore, we need to assume the order of the calls based
-    // on the test order
-    expect(mockTerminal.writeln).toHaveBeenNthCalledWith(
-      3,
-      `export GITHUB_TOKEN=${"*".repeat(10)},${"*".repeat(10)},${"*".repeat(10)}`,
-    );
-    expect(mockTerminal.writeln).toHaveBeenNthCalledWith(4, "*".repeat(10));
+    // This test is no longer relevant as secrets filtering has been removed
  });
 });
@@ -1,15 +1,12 @@
-import { describe, expect, it } from "vitest";
-import fs from "fs";
-import path from "path";
+import { describe, expect, it } from 'vitest';
+import fs from 'fs';
+import path from 'path';

-describe("translation.json", () => {
-  it("should not have duplicate translation keys", () => {
+describe('translation.json', () => {
+  it('should not have duplicate translation keys', () => {
    // Read the translation.json file
-    const translationPath = path.join(
-      __dirname,
-      "../../src/i18n/translation.json",
-    );
-    const translationContent = fs.readFileSync(translationPath, "utf-8");
+    const translationPath = path.join(__dirname, '../../src/i18n/translation.json');
+    const translationContent = fs.readFileSync(translationPath, 'utf-8');

    // First, let's check for exact string matches of key definitions
    const keyRegex = /"([^"]+)": {/g;
@@ -33,7 +30,7 @@ describe("translation.json", () => {
    if (uniqueDuplicates.length > 0) {
      const errorMessage = `Found duplicate translation keys:\n${uniqueDuplicates
        .map((key) => `  - "${key}" appears ${keyOccurrences.get(key)} times`)
-        .join("\n")}`;
+        .join('\n')}`;
      throw new Error(errorMessage);
    }

@@ -41,13 +38,10 @@ describe("translation.json", () => {
    expect(uniqueDuplicates).toHaveLength(0);
  });

-  it("should have consistent translations for each key", () => {
+  it('should have consistent translations for each key', () => {
    // Read the translation.json file
-    const translationPath = path.join(
-      __dirname,
-      "../../src/i18n/translation.json",
-    );
-    const translationContent = fs.readFileSync(translationPath, "utf-8");
+    const translationPath = path.join(__dirname, '../../src/i18n/translation.json');
+    const translationContent = fs.readFileSync(translationPath, 'utf-8');
    const translations = JSON.parse(translationContent);

    // Create a map to store English translations for each key
@@ -56,7 +50,7 @@ describe("translation.json", () => {

    // Check each key's English translation
    Object.entries(translations).forEach(([key, value]: [string, any]) => {
-      if (typeof value === "object" && value.en !== undefined) {
+      if (typeof value === 'object' && value.en !== undefined) {
        const currentEn = value.en.toLowerCase();
        const existingEn = englishTranslations.get(key)?.toLowerCase();

@@ -71,10 +65,8 @@ describe("translation.json", () => {
    // If there are inconsistencies, create a helpful error message
    if (inconsistentKeys.length > 0) {
      const errorMessage = `Found inconsistent translations for keys:\n${inconsistentKeys
-        .map(
-          (key) => `  - "${key}" has multiple different English translations`,
-        )
-        .join("\n")}`;
+        .map((key) => `  - "${key}" has multiple different English translations`)
+        .join('\n')}`;
      throw new Error(errorMessage);
    }

@@ -65,9 +65,7 @@ describe("Settings Screen", () => {

    await waitFor(() => {
      // Use queryAllByText to handle multiple elements with the same text
-      expect(screen.queryAllByText("SETTINGS$LLM_SETTINGS")).not.toHaveLength(
-        0,
-      );
+      expect(screen.queryAllByText("SETTINGS$LLM_SETTINGS")).not.toHaveLength(0);
      screen.getByText("ACCOUNT_SETTINGS$ADDITIONAL_SETTINGS");
      screen.getByText("BUTTON$RESET_TO_DEFAULTS");
      screen.getByText("BUTTON$SAVE");
@@ -32,11 +32,9 @@ describe("Actions Service", () => {

      handleStatusMessage(message);

-      expect(store.dispatch).toHaveBeenCalledWith(
-        expect.objectContaining({
-          payload: message,
-        }),
-      );
+      expect(store.dispatch).toHaveBeenCalledWith(expect.objectContaining({
+        payload: message,
+      }));
    });

    it("should log error messages and display them in chat", () => {
@@ -55,11 +53,9 @@ describe("Actions Service", () => {
        metadata: { msgId: "runtime.connection.failed" },
      });

-      expect(store.dispatch).toHaveBeenCalledWith(
-        expect.objectContaining({
-          payload: message,
-        }),
-      );
+      expect(store.dispatch).toHaveBeenCalledWith(expect.objectContaining({
+        payload: message,
+      }));
    });
  });

@@ -76,27 +72,21 @@ describe("Actions Service", () => {
          final_thought: "",
          task_completed: "partial",
          outputs: "",
-          thought: "",
-        },
+          thought: ""
+        }
      };

      // Mock implementation to capture the message
      let capturedPartialMessage = "";
      (store.dispatch as any).mockImplementation((action: any) => {
-        if (
-          action.type === "chat/addAssistantMessage" &&
-          action.payload.includes(
-            "believe that the task was **completed partially**",
-          )
-        ) {
+        if (action.type === "chat/addAssistantMessage" &&
+            action.payload.includes("believe that the task was **completed partially**")) {
          capturedPartialMessage = action.payload;
        }
      });

      handleActionMessage(messagePartial);
-      expect(capturedPartialMessage).toContain(
-        "I believe that the task was **completed partially**",
-      );
+      expect(capturedPartialMessage).toContain("I believe that the task was **completed partially**");

      // Test not completed
      const messageNotCompleted: ActionMessage = {
@@ -109,25 +99,21 @@ describe("Actions Service", () => {
          final_thought: "",
          task_completed: "false",
          outputs: "",
-          thought: "",
-        },
+          thought: ""
+        }
      };

      // Mock implementation to capture the message
      let capturedNotCompletedMessage = "";
      (store.dispatch as any).mockImplementation((action: any) => {
-        if (
-          action.type === "chat/addAssistantMessage" &&
-          action.payload.includes("believe that the task was **not completed**")
-        ) {
+        if (action.type === "chat/addAssistantMessage" &&
+            action.payload.includes("believe that the task was **not completed**")) {
          capturedNotCompletedMessage = action.payload;
        }
      });

      handleActionMessage(messageNotCompleted);
-      expect(capturedNotCompletedMessage).toContain(
-        "I believe that the task was **not completed**",
-      );
+      expect(capturedNotCompletedMessage).toContain("I believe that the task was **not completed**");

      // Test completed successfully
      const messageCompleted: ActionMessage = {
@@ -140,27 +126,21 @@ describe("Actions Service", () => {
          final_thought: "",
          task_completed: "true",
          outputs: "",
-          thought: "",
-        },
+          thought: ""
+        }
      };

      // Mock implementation to capture the message
      let capturedCompletedMessage = "";
      (store.dispatch as any).mockImplementation((action: any) => {
-        if (
-          action.type === "chat/addAssistantMessage" &&
-          action.payload.includes(
-            "believe that the task was **completed successfully**",
-          )
-        ) {
+        if (action.type === "chat/addAssistantMessage" &&
+            action.payload.includes("believe that the task was **completed successfully**")) {
          capturedCompletedMessage = action.payload;
        }
      });

      handleActionMessage(messageCompleted);
-      expect(capturedCompletedMessage).toContain(
-        "I believe that the task was **completed successfully**",
-      );
+      expect(capturedCompletedMessage).toContain("I believe that the task was **completed successfully**");
    });
  });
 });
@@ -0,0 +1,51 @@
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import { handleObservationMessage } from "#/services/observations";
+import store from "#/store";
+import { ObservationMessage } from "#/types/message";
+
+// Mock dependencies
+vi.mock("#/store", () => ({
+  default: {
+    dispatch: vi.fn(),
+  },
+}));
+
+describe("Observations Service", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  describe("handleObservationMessage", () => {
+    const createErrorMessage = (): ObservationMessage => ({
+      id: 14,
+      timestamp: "2025-04-14T13:37:54.451843",
+      message: "The action has not been executed.",
+      cause: 12,
+      observation: "error",
+      content: "The action has not been executed.",
+      extras: {
+        error_id: "",
+        metadata: {},
+      },
+    });
+
+    it("should dispatch error messages exactly once", () => {
+      const errorMessage = createErrorMessage();
+
+      handleObservationMessage(errorMessage);
+
+      expect(store.dispatch).toHaveBeenCalledTimes(1);
+      expect(store.dispatch).toHaveBeenCalledWith({
+        type: "chat/addAssistantObservation",
+        payload: expect.objectContaining({
+          observation: "error",
+          content: "The action has not been executed.",
+          source: "user",
+          extras: {
+            error_id: "",
+          },
+        }),
+      });
+    });
+  });
+});
@@ -5,16 +5,15 @@ const mockI18n = {
  language: "ja",
  t: (key: string) => {
    const translations: Record<string, string> = {
-      SUGGESTIONS$TODO_APP: "ToDoリストアプリを開発する",
-      LANDING$BUILD_APP_BUTTON: "プルリクエストを表示するアプリを開発する",
-      SUGGESTIONS$HACKER_NEWS:
-        "Hacker Newsのトップ記事を表示するbashスクリプトを作成する",
-      LANDING$TITLE: "一緒に開発を始めましょう！",
-      OPEN_IN_VSCODE: "VS Codeで開く",
-      INCREASE_TEST_COVERAGE: "テストカバレッジを向上",
-      AUTO_MERGE_PRS: "PRを自動マージ",
-      FIX_README: "READMEを修正",
-      CLEAN_DEPENDENCIES: "依存関係を整理",
+      "SUGGESTIONS$TODO_APP": "ToDoリストアプリを開発する",
+      "LANDING$BUILD_APP_BUTTON": "プルリクエストを表示するアプリを開発する",
+      "SUGGESTIONS$HACKER_NEWS": "Hacker Newsのトップ記事を表示するbashスクリプトを作成する",
+      "LANDING$TITLE": "一緒に開発を始めましょう！",
+      "OPEN_IN_VSCODE": "VS Codeで開く",
+      "INCREASE_TEST_COVERAGE": "テストカバレッジを向上",
+      "AUTO_MERGE_PRS": "PRを自動マージ",
+      "FIX_README": "READMEを修正",
+      "CLEAN_DEPENDENCIES": "依存関係を整理"
    };
    return translations[key] || key;
  },
@@ -24,5 +23,7 @@ const mockI18n = {
 };

 export function I18nTestProvider({ children }: { children: ReactNode }) {
-  return <I18nextProvider i18n={mockI18n as any}>{children}</I18nextProvider>;
+  return (
+    <I18nextProvider i18n={mockI18n as any}>{children}</I18nextProvider>
+  );
 }
@@ -1,6 +1,6 @@
 {
  "name": "openhands-frontend",
-  "version": "0.32.0",
+  "version": "0.33.0",
  "private": true,
  "type": "module",
  "engines": {
@@ -9,27 +9,27 @@
  "dependencies": {
    "@heroui/react": "2.7.6",
    "@monaco-editor/react": "^4.7.0-rc.0",
-    "@react-router/node": "^7.5.0",
-    "@react-router/serve": "^7.5.0",
-    "@react-types/shared": "^3.28.0",
-    "@reduxjs/toolkit": "^2.6.1",
+    "@react-router/node": "^7.5.1",
+    "@react-router/serve": "^7.5.1",
+    "@react-types/shared": "^3.29.0",
+    "@reduxjs/toolkit": "^2.7.0",
    "@stripe/react-stripe-js": "^3.6.0",
-    "@stripe/stripe-js": "^7.0.0",
-    "@tanstack/react-query": "^5.72.1",
-    "@vitejs/plugin-react": "^4.3.2",
+    "@stripe/stripe-js": "^7.1.0",
+    "@tanstack/react-query": "^5.74.4",
+    "@vitejs/plugin-react": "^4.4.0",
    "@xterm/addon-fit": "^0.10.0",
    "@xterm/xterm": "^5.4.0",
    "axios": "^1.8.4",
    "clsx": "^2.1.1",
    "eslint-config-airbnb-typescript": "^18.0.0",
-    "framer-motion": "^12.6.3",
-    "i18next": "^24.2.3",
-    "i18next-browser-languagedetector": "^8.0.4",
+    "framer-motion": "^12.7.4",
+    "i18next": "^25.0.0",
+    "i18next-browser-languagedetector": "^8.0.5",
    "i18next-http-backend": "^3.0.2",
    "isbot": "^5.1.25",
    "jose": "^6.0.10",
    "monaco-editor": "^0.52.2",
-    "posthog-js": "^1.235.0",
+    "posthog-js": "^1.236.2",
    "react": "^19.1.0",
    "react-dom": "^19.1.0",
    "react-highlight": "^0.15.0",
@@ -38,14 +38,14 @@
    "react-icons": "^5.5.0",
    "react-markdown": "^10.1.0",
    "react-redux": "^9.2.0",
-    "react-router": "^7.5.0",
+    "react-router": "^7.5.1",
    "react-syntax-highlighter": "^15.6.1",
    "react-textarea-autosize": "^8.5.9",
    "remark-gfm": "^4.0.1",
    "sirv-cli": "^3.0.1",
    "socket.io-client": "^4.8.1",
    "tailwind-merge": "^3.2.0",
-    "vite": "^6.2.5",
+    "vite": "^6.3.2",
    "web-vitals": "^3.5.2",
    "ws": "^8.18.1"
  },
@@ -79,16 +79,16 @@
    "@babel/traverse": "^7.27.0",
    "@babel/types": "^7.27.0",
    "@mswjs/socket.io-binding": "^0.1.1",
-    "@playwright/test": "^1.51.1",
-    "@react-router/dev": "^7.5.0",
+    "@playwright/test": "^1.52.0",
+    "@react-router/dev": "^7.5.1",
    "@tailwindcss/typography": "^0.5.16",
-    "@tanstack/eslint-plugin-query": "^5.72.1",
+    "@tanstack/eslint-plugin-query": "^5.73.3",
    "@testing-library/dom": "^10.4.0",
    "@testing-library/jest-dom": "^6.6.1",
    "@testing-library/react": "^16.3.0",
    "@testing-library/user-event": "^14.6.1",
-    "@types/node": "^22.14.0",
-    "@types/react": "^19.1.0",
+    "@types/node": "^22.14.1",
+    "@types/react": "^19.1.2",
    "@types/react-dom": "^19.1.1",
    "@types/react-highlight": "^0.12.8",
    "@types/react-syntax-highlighter": "^15.5.13",
@@ -101,7 +101,7 @@
    "eslint": "^8.57.0",
    "eslint-config-airbnb": "^19.0.4",
    "eslint-config-airbnb-typescript": "^18.0.0",
-    "eslint-config-prettier": "^10.1.1",
+    "eslint-config-prettier": "^10.1.2",
    "eslint-plugin-import": "^2.29.1",
    "eslint-plugin-jsx-a11y": "^6.10.2",
    "eslint-plugin-prettier": "^5.2.6",
@@ -109,8 +109,8 @@
    "eslint-plugin-react-hooks": "^4.6.2",
    "eslint-plugin-unused-imports": "^4.1.4",
    "husky": "^9.1.7",
-    "jsdom": "^26.0.0",
-    "lint-staged": "^15.5.0",
+    "jsdom": "^26.1.0",
+    "lint-staged": "^15.5.1",
    "msw": "^2.6.6",
    "postcss": "^8.5.2",
    "prettier": "^3.5.3",
@@ -275,8 +275,8 @@ function isCommonDevelopmentString(str) {

  // HTML tags and attributes
  if (
-    /^<[a-z0-9]+>.*<\/[a-z0-9]+>$/.test(str) ||
-    /^<[a-z0-9]+ [^>]+\/>$/.test(str)
+    /^<[a-z0-9]+(?:\s[^>]*)?>.*<\/[a-z0-9]+>$/i.test(str) ||
+    /^<[a-z0-9]+ [^>]+\/>$/i.test(str)
  ) {
    return true;
  }
@@ -8,6 +8,8 @@ import {
  Conversation,
  ResultSet,
  GetTrajectoryResponse,
+  GitChangeDiff,
+  GitChange,
 } from "./open-hands.types";
 import { openHands } from "./open-hands-axios";
 import { ApiSettings, PostApiSettings } from "#/types/settings";
@@ -277,6 +279,26 @@ class OpenHands {
      appMode === "saas" ? "/api/logout" : "/api/unset-settings-tokens";
    await openHands.post(endpoint);
  }
+
+  static async getGitChanges(conversationId: string): Promise<GitChange[]> {
+    const { data } = await openHands.get<GitChange[]>(
+      `/api/conversations/${conversationId}/git/changes`,
+    );
+    return data;
+  }
+
+  static async getGitChangeDiff(
+    conversationId: string,
+    path: string,
+  ): Promise<GitChangeDiff> {
+    const { data } = await openHands.get<GitChangeDiff>(
+      `/api/conversations/${conversationId}/git/diff`,
+      {
+        params: { path },
+      },
+    );
+    return data;
+  }
 }

 export default OpenHands;
@@ -70,6 +70,8 @@ export interface AuthenticateResponse {
  error?: string;
 }

+export type ConversationTrigger = "resolver" | "gui";
+
 export interface Conversation {
  conversation_id: string;
  title: string;
@@ -77,9 +79,22 @@ export interface Conversation {
  last_updated_at: string;
  created_at: string;
  status: ProjectStatus;
+  trigger?: ConversationTrigger;
 }

 export interface ResultSet<T> {
  results: T[];
  next_page_id: string | null;
 }
+
+export type GitChangeStatus = "M" | "A" | "D" | "R" | "U";
+
+export interface GitChange {
+  status: GitChangeStatus;
+  path: string;
+}
+
+export interface GitChangeDiff {
+  modified: string;
+  original: string;
+}
@@ -0,0 +1,6 @@
+<svg width="22" height="22" viewBox="0 0 22 22" fill="none" xmlns="http://www.w3.org/2000/svg">
+  <path d="M11 21L16.5 8H5.5L11 21Z" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
+  <path d="M1 8L3.5 15.5L11 21L18.5 15.5L21 8" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
+  <path d="M1 8L5.5 8L8.25 1" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
+  <path d="M21 8L16.5 8L13.75 1" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
+</svg>
@@ -1,22 +0,0 @@
-import React from "react";
-
-function CmdLine() {
-  return (
-    <svg
-      xmlns="http://www.w3.org/2000/svg"
-      fill="none"
-      viewBox="0 0 24 24"
-      strokeWidth={1.5}
-      stroke="currentColor"
-      className="w-6 h-6"
-    >
-      <path
-        strokeLinecap="round"
-        strokeLinejoin="round"
-        d="m6.75 7.5 3 2.25-3 2.25m4.5 0h3m-9 8.25h13.5A2.25 2.25 0 0 0 21 18V6a2.25 2.25 0 0 0-2.25-2.25H5.25A2.25 2.25 0 0 0 3 6v12a2.25 2.25 0 0 0 2.25 2.25Z"
-      />
-    </svg>
-  );
-}
-
-export default CmdLine;
@@ -29,7 +29,7 @@ export function ChatInput({
  disabled,
  showButton = true,
  value,
-  maxRows = 4,
+  maxRows = 16,
  onSubmit,
  onStop,
  onChange,
@@ -53,6 +53,7 @@ export function ExpandableMessage({
  });

  useEffect(() => {
+    // If we have a translation ID, process it
    if (id && i18n.exists(id)) {
      let processedObservation = observation;
      let processedAction = action;
@@ -4,6 +4,9 @@ import { ChatMessage } from "#/components/features/chat/chat-message";
 import { ConfirmationButtons } from "#/components/shared/buttons/confirmation-buttons";
 import { ImageCarousel } from "../images/image-carousel";
 import { ExpandableMessage } from "./expandable-message";
+import { useUserConversation } from "#/hooks/query/use-user-conversation";
+import { useConversation } from "#/context/conversation-context";
+import { I18nKey } from "#/i18n/declaration";

 interface MessagesProps {
  messages: Message[];
@@ -11,13 +14,38 @@ interface MessagesProps {
 }

 export const Messages: React.FC<MessagesProps> = React.memo(
-  ({ messages, isAwaitingUserConfirmation }) =>
-    messages.map((message, index) => {
+  ({ messages, isAwaitingUserConfirmation }) => {
+    const { conversationId } = useConversation();
+    const { data: conversation } = useUserConversation(conversationId || null);
+
+    // Check if conversation metadata has trigger=resolver
+    const isResolverTrigger = conversation?.trigger === "resolver";
+
+    return messages.map((message, index) => {
      const shouldShowConfirmationButtons =
        messages.length - 1 === index &&
        message.sender === "assistant" &&
        isAwaitingUserConfirmation;

+      const isFirstUserMessageWithResolverTrigger =
+        index === 0 && message.sender === "user" && isResolverTrigger;
+
+      // Special case: First user message with resolver trigger
+      if (isFirstUserMessageWithResolverTrigger) {
+        return (
+          <div key={index}>
+            <ExpandableMessage
+              type="action"
+              message={message.content}
+              id={I18nKey.CHAT$RESOLVER_INSTRUCTIONS}
+            />
+            {message.imageUrls && message.imageUrls.length > 0 && (
+              <ImageCarousel size="small" images={message.imageUrls} />
+            )}
+          </div>
+        );
+      }
+
      if (message.type === "error" || message.type === "action") {
        return (
          <div key={index}>
@@ -46,7 +74,8 @@ export const Messages: React.FC<MessagesProps> = React.memo(
          {shouldShowConfirmationButtons && <ConfirmationButtons />}
        </ChatMessage>
      );
-    }),
+    });
+  },
 );

 Messages.displayName = "Messages";
@@ -1,20 +0,0 @@
-import { useTranslation } from "react-i18next";
-import { I18nKey } from "#/i18n/declaration";
-
-interface NewConversationButtonProps {
-  onClick: () => void;
-}
-
-export function NewConversationButton({ onClick }: NewConversationButtonProps) {
-  const { t } = useTranslation();
-  return (
-    <button
-      data-testid="new-conversation-button"
-      type="button"
-      onClick={onClick}
-      className="font-bold bg-[#4465DB] px-2 py-1 rounded"
-    >
-      + {t(I18nKey.PROJECT$NEW)}
-    </button>
-  );
-}
@@ -0,0 +1,172 @@
+import { DiffEditor } from "@monaco-editor/react";
+import React from "react";
+import { editor as editor_t } from "monaco-editor";
+import { LuFileDiff, LuFileMinus, LuFilePlus } from "react-icons/lu";
+import { IconType } from "react-icons/lib";
+import { GitChangeStatus } from "#/api/open-hands.types";
+import { getLanguageFromPath } from "#/utils/get-language-from-path";
+import { cn } from "#/utils/utils";
+import ChevronUp from "#/icons/chveron-up.svg?react";
+import { useGitDiff } from "#/hooks/query/use-get-diff";
+
+interface LoadingSpinnerProps {
+  className?: string;
+}
+
+// TODO: Move out of this file and replace the current spinner with this one
+function LoadingSpinner({ className }: LoadingSpinnerProps) {
+  return (
+    <div className="flex items-center justify-center">
+      <div
+        className={cn(
+          "animate-spin rounded-full border-4 border-gray-200 border-t-blue-500",
+          className,
+        )}
+        role="status"
+        aria-label="Loading"
+      />
+    </div>
+  );
+}
+
+const STATUS_MAP: Record<GitChangeStatus, string | IconType> = {
+  A: LuFilePlus,
+  D: LuFileMinus,
+  M: LuFileDiff,
+  R: "Renamed",
+  U: "Untracked",
+};
+
+export interface FileDiffViewerProps {
+  path: string;
+  type: GitChangeStatus;
+}
+
+export function FileDiffViewer({ path, type }: FileDiffViewerProps) {
+  const [isCollapsed, setIsCollapsed] = React.useState(true);
+  const [editorHeight, setEditorHeight] = React.useState(400);
+  const diffEditorRef = React.useRef<editor_t.IStandaloneDiffEditor>(null);
+
+  const isAdded = type === "A" || type === "U";
+  const isDeleted = type === "D";
+
+  const filePath = React.useMemo(() => {
+    if (type === "R") {
+      const parts = path.split(/\s+/).slice(1);
+      return parts[parts.length - 1];
+    }
+
+    return path;
+  }, [path, type]);
+
+  const {
+    data: diff,
+    isLoading,
+    isSuccess,
+    isRefetching,
+  } = useGitDiff({
+    filePath,
+    type,
+    enabled: !isCollapsed,
+  });
+
+  // Function to update editor height based on content
+  const updateEditorHeight = React.useCallback(() => {
+    if (diffEditorRef.current) {
+      const originalEditor = diffEditorRef.current.getOriginalEditor();
+      const modifiedEditor = diffEditorRef.current.getModifiedEditor();
+
+      if (originalEditor && modifiedEditor) {
+        // Get the content height from both editors and use the larger one
+        const originalHeight = originalEditor.getContentHeight();
+        const modifiedHeight = modifiedEditor.getContentHeight();
+        const contentHeight = Math.max(originalHeight, modifiedHeight);
+
+        // Add a small buffer to avoid scrollbar
+        setEditorHeight(contentHeight + 20);
+      }
+    }
+  }, []);
+
+  const handleEditorDidMount = (editor: editor_t.IStandaloneDiffEditor) => {
+    diffEditorRef.current = editor;
+    updateEditorHeight();
+
+    const originalEditor = editor.getOriginalEditor();
+    const modifiedEditor = editor.getModifiedEditor();
+
+    originalEditor.onDidContentSizeChange(updateEditorHeight);
+    modifiedEditor.onDidContentSizeChange(updateEditorHeight);
+  };
+
+  const status = type === "U" ? STATUS_MAP.A : STATUS_MAP[type];
+
+  let statusIcon: React.ReactNode;
+  if (typeof status === "string") {
+    statusIcon = <span>{status}</span>;
+  } else {
+    const StatusIcon = status; // now it's recognized as a component
+    statusIcon = <StatusIcon className="w-5 h-5" />;
+  }
+
+  const isFetchingData = isLoading || isRefetching;
+
+  return (
+    <div data-testid="file-diff-viewer-outer" className="w-full flex flex-col">
+      <div
+        className={cn(
+          "flex justify-between items-center px-2.5 py-3.5 border border-neutral-600 rounded-xl hover:cursor-pointer",
+          !isCollapsed && !isLoading && "border-b-0 rounded-b-none",
+        )}
+        onClick={() => setIsCollapsed((prev) => !prev)}
+      >
+        <span className="text-sm w-full text-content flex items-center gap-2">
+          {isFetchingData && <LoadingSpinner className="w-5 h-5" />}
+          {!isFetchingData && statusIcon}
+          <strong className="w-full truncate">{filePath}</strong>
+          <button data-testid="collapse" type="button">
+            <ChevronUp
+              className={cn(
+                "w-4 h-4 transition-transform",
+                isCollapsed && "transform rotate-180",
+              )}
+            />
+          </button>
+        </span>
+      </div>
+      {isSuccess && !isCollapsed && (
+        <div
+          className="w-full border border-neutral-600 overflow-hidden"
+          style={{ height: `${editorHeight}px` }}
+        >
+          <DiffEditor
+            data-testid="file-diff-viewer"
+            className="w-full h-full"
+            language={getLanguageFromPath(filePath)}
+            original={isAdded ? "" : diff.original}
+            modified={isDeleted ? "" : diff.modified}
+            theme="vs-dark"
+            onMount={handleEditorDidMount}
+            options={{
+              renderValidationDecorations: "off",
+              readOnly: true,
+              renderSideBySide: !isAdded && !isDeleted,
+              scrollBeyondLastLine: false,
+              minimap: {
+                enabled: false,
+              },
+              hideUnchangedRegions: {
+                enabled: true,
+              },
+              automaticLayout: true,
+              scrollbar: {
+                // Make scrollbar less intrusive
+                alwaysConsumeMouseWheel: false,
+              },
+            }}
+          />
+        </div>
+      )}
+    </div>
+  );
+}
@@ -23,8 +23,8 @@ export function PaymentForm() {
    if (amount?.trim()) {
      if (!amountIsValid(amount)) return;

-      const float = parseFloat(amount);
-      addBalance({ amount: Number(float.toFixed(2)) });
+      const intValue = parseInt(amount, 10);
+      addBalance({ amount: intValue });
    }

    setButtonIsDisabled(true);
@@ -65,10 +65,13 @@ export function PaymentForm() {
          testId="top-up-input"
          name="top-up-input"
          onChange={handleTopUpInputChange}
-          type="text"
+          type="number"
          label={t(I18nKey.PAYMENT$ADD_FUNDS)}
          placeholder="Specify an amount in USD to add - min $10"
          className="w-[680px]"
+          min={10}
+          max={25000}
+          step={1}
        />

        <div className="flex items-center w-[680px] gap-2">
@@ -13,6 +13,9 @@ interface SettingsInputProps {
  startContent?: React.ReactNode;
  className?: string;
  onChange?: (value: string) => void;
+  min?: number;
+  max?: number;
+  step?: number;
 }

 export function SettingsInput({
@@ -27,6 +30,9 @@ export function SettingsInput({
  startContent,
  className,
  onChange,
+  min,
+  max,
+  step,
 }: SettingsInputProps) {
  return (
    <label className={cn("flex flex-col gap-2.5 w-fit", className)}>
@@ -43,6 +49,9 @@ export function SettingsInput({
        type={type}
        defaultValue={defaultValue}
        placeholder={placeholder}
+        min={min}
+        max={max}
+        step={step}
        className={cn(
          "bg-tertiary border border-[#717888] h-10 w-full rounded p-2 placeholder:italic placeholder:text-tertiary-alt",
          "disabled:bg-[#2D2F36] disabled:border-[#2D2F36] disabled:cursor-not-allowed",
@@ -1,26 +0,0 @@
-import { useSelector } from "react-redux";
-import { useTranslation } from "react-i18next";
-import { cn } from "#/utils/utils";
-import { AgentState } from "#/types/agent-state";
-import { RootState } from "#/store";
-import { I18nKey } from "#/i18n/declaration";
-
-export function TerminalStatusLabel() {
-  const { t } = useTranslation();
-  const { curAgentState } = useSelector((state: RootState) => state.agent);
-
-  return (
-    <div className="flex items-center gap-2">
-      <div
-        className={cn(
-          "w-2 h-2 rounded-full",
-          curAgentState === AgentState.LOADING ||
-            curAgentState === AgentState.STOPPED
-            ? "bg-red-500 animate-pulse"
-            : "bg-green-500",
-        )}
-      />
-      {t(I18nKey.WORKSPACE$TERMINAL_TAB_LABEL)}
-    </div>
-  );
-}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
rohitvinodmalhotra@gmail.com	63edbaca2f	add callback logs	2025-04-21 15:19:34 -04:00
rohitvinodmalhotra@gmail.com	3cc689d557	Revert "Merge only openhands/events/stream.py from add-event-stream-diagnostics branch" This reverts commit `fb9162ac6b`.	2025-04-21 15:10:43 -04:00
openhands	44cc2a463b	Merge remote-tracking branch 'origin/abstract-resolver'	2025-04-21 18:52:49 +00:00
openhands	fb9162ac6b	Merge only openhands/events/stream.py from add-event-stream-diagnostics branch	2025-04-21 18:52:34 +00:00
rohitvinodmalhotra@gmail.com	83e497cfa5	Revert "Merge add-event-stream-diagnostics into abstract-resolver" This reverts commit `395d324696`.	2025-04-21 14:52:19 -04:00
openhands	395d324696	Merge add-event-stream-diagnostics into abstract-resolver	2025-04-21 18:49:22 +00:00
rohitvinodmalhotra@gmail.com	1abc67f9e9	rm unsubscription	2025-04-19 22:03:16 -04:00
rohitvinodmalhotra@gmail.com	b883820d07	move subscription to method	2025-04-19 21:32:54 -04:00
openhands	71950c9169	Fix resolver tests to work with new constructor signature	2025-04-20 01:07:37 +00:00
rohitvinodmalhotra@gmail.com	aa60bb5626	move arg processing to cls	2025-04-19 21:00:03 -04:00
Rohit Malhotra	c145b1531e	Merge branch 'main' into abstract-resolver	2025-04-18 22:48:51 -04:00
rohitvinodmalhotra@gmail.com	75d0f9199b	fix lint	2025-04-18 22:18:24 -04:00
openhands	b3f155d957	Fix dictionary changed size during iteration error in IssueResolver	2025-04-19 02:10:16 +00:00
openhands	71799fa7fb	Fix unit tests for class-based resolver implementation	2025-04-18 23:27:19 +00:00
openhands	237810241a	Update resolver unit tests to work with class-based implementation	2025-04-18 23:15:53 +00:00
rohitvinodmalhotra@gmail.com	4a84e2c01d	Revert "Update tests to work with refactored IssueResolver class" This reverts commit `b439ef39ae`.	2025-04-18 18:46:27 -04:00
rohitvinodmalhotra@gmail.com	9c92b5d828	Revert "Update dependencies and GitLab test" This reverts commit `d175ecb2a8`.	2025-04-18 18:46:13 -04:00
openhands	d175ecb2a8	Update dependencies and GitLab test	2025-04-18 22:43:16 +00:00
openhands	b439ef39ae	Update tests to work with refactored IssueResolver class	2025-04-18 22:43:16 +00:00
rohitvinodmalhotra@gmail.com	4c0a3f262e	fix lint	2025-04-18 18:28:11 -04:00
Xingyao Wang	b5d7e428d1	Fix #7916 : Update Benchmark Score link in README (#7943 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-19 00:27:54 +02:00
Rohit Malhotra	d65ea313e8	Add external auth id to `/repos` route (#7946 )	2025-04-18 18:19:36 -04:00
openhands	25ae901990	Update AllIssueResolver class to use proper OOP design	2025-04-18 22:16:02 +00:00
openhands	aa6600d104	Fix remaining GITLAB_CI references to use class constant	2025-04-18 22:14:22 +00:00
rohitvinodmalhotra@gmail.com	0eeac0990e	fix defualt vals	2025-04-18 18:13:00 -04:00
openhands	68b886c5f5	Update IssueResolver class to use proper OOP design	2025-04-18 22:04:57 +00:00
openhands	8448a9562d	Add constructor to IssueResolver class and use instance variables	2025-04-18 21:56:43 +00:00
rohitvinodmalhotra@gmail.com	ddc7424181	Merge branch 'main' into abstract-resolver	2025-04-18 17:51:24 -04:00
Rohit Malhotra	a09ecadba6	(Chore): Rm deprecate `/installations` route (#7945 )	2025-04-18 21:51:08 +00:00
Rohit Malhotra	358166feb2	[Logging]: Add warning logs for gitlab api (#7941 )	2025-04-18 21:31:36 +00:00
Kenny Dizi	85e2b73eb4	[Feat] Support o3 and o4 mini (#7898 ) Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>	2025-04-18 21:23:19 +00:00
Rohit Malhotra	c18475ddc2	Add public vs private info for repo list (#7937 )	2025-04-18 17:07:08 -04:00
Xingyao Wang	06fcf54475	chore: track condeser metadata for llm completion (#7938 )	2025-04-19 05:05:31 +08:00
Rohit Malhotra	f751f8ab37	[Feat]: Add graphql fetching for GitLab service cls (#7839 )	2025-04-18 16:53:49 -04:00
Tei1988	523c6d03c1	fix(resolver): Allow using github.token in Actions and fix base_domain handling (#7934 )	2025-04-18 20:50:59 +00:00
Rohit Malhotra	0e0f043e59	[Feat]: Improve resolver inline pr comment localization (#7932 )	2025-04-18 15:56:34 -04:00
Xingyao Wang	91c691d526	[agent] Read-only Agent (#6947 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-19 02:46:24 +08:00
Xingyao Wang	c6092291ce	chore: make sure Makefile `make lint` also lint tests (#7933 )	2025-04-18 14:01:36 -04:00
Engel Nyst	a2c55cfdef	Refactor to clean up and move utility/legacy out of the agent (#7917 )	2025-04-19 01:53:33 +08:00
Michael Panchenko	76cad626ed	Bugfix: make extraction of poetry_venvs_path more robust (#7920 )	2025-04-18 17:33:52 +00:00
Xingyao Wang	7c23993344	fix(eval): typo in SWE_Bench evaluation (#7930 )	2025-04-19 00:31:08 +08:00
dependabot[bot]	b669715416	chore(deps): bump the version-all group across 1 directory with 20 updates (#7925 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: amanape <83104063+amanape@users.noreply.github.com>	2025-04-18 16:08:35 +00:00
tofarr	7292122b72	Refactor agent loop initialization for better extensibility (#7926 )	2025-04-18 09:44:34 -06:00
dependabot[bot]	992ae15c78	chore(deps): bump the version-all group with 7 updates (#7927 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-04-18 15:02:41 +00:00
Carlos Freund	f2b4772ac2	fix(runtime) Avoid windows reserved port (#7722 ) Co-authored-by: Carlos Freund <carlosfreund@gmail.com>	2025-04-18 22:18:46 +08:00
Engel Nyst	9b9b1291fc	[chore] Just linting on swe-bench files (#7918 )	2025-04-18 22:12:01 +08:00
rohitvinodmalhotra@gmail.com	0e2d9dce88	fix lint	2025-04-17 19:10:34 -04:00
Xingyao Wang	6171395ef9	Refactor metrics handling to include condenser metrics (#7907 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-17 23:08:35 +00:00
sp.wack	d270476d6c	hotfix(backend): Exclude open PRs when fetching suggested tasks (#7912 )	2025-04-18 03:00:04 +04:00
Xingyao Wang	f1f7dca009	refactor(action_execution_client): rename function and add property (#7913 )	2025-04-18 06:59:13 +08:00
rohitvinodmalhotra@gmail.com	255edbbfd7	fix async mark	2025-04-17 18:40:10 -04:00
rohitvinodmalhotra@gmail.com	cbf0f541a8	convert to classes	2025-04-17 18:36:21 -04:00
mamoodi	45f572f268	Update documentation (#7905 )	2025-04-17 17:43:18 -04:00
Niels Mündler	4b124d5906	Add inference for SWT-Bench (#7201 ) Co-authored-by: Xingyao Wang <xingyao@all-hands.dev> Co-authored-by: Graham Neubig <neubig@gmail.com> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: Calvin Smith <email@cjsmith.io>	2025-04-17 14:49:42 -06:00
Robert Brennan	988d4aa679	Improved logging for agent controller, including pending action time (#7897 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-17 20:22:38 +00:00
Robert Brennan	b452fe273c	Restore terminal interactivity while keeping UI changes (#7903 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-17 19:42:03 +00:00
sp.wack	9544b37c8a	hotfix(forntend): Wait for runtime to start before fetching git changes for diff view (#7910 )	2025-04-17 15:15:03 -04:00
Rohit Malhotra	0491357fef	[Refactor]: Collapse initial user message for cloud resolver (#7871 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-17 15:09:28 -04:00
Robert Brennan	fedd517a71	Change add funds input to number type that only accepts integers (#7628 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-17 18:56:23 +00:00
Rohit Malhotra	9cbed8802f	Update translation from GitHub Settings to Git Settings (#7908 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-17 18:55:28 +00:00
Rohit Malhotra	c2e1babd76	Fix failing unit test on main (#7909 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-17 18:43:09 +00:00
Carlos Freund	cc8b677f3e	fix(metrics) Merge metrics of agent LLM and condenser LLM (#7890 ) Co-authored-by: Carlos Freund <carlosfreund@gmail.com>	2025-04-18 01:15:14 +08:00
chuckbutkus	78e3f82de1	Change client name and add IDP hint (#7787 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Xingyao Wang <xingyao@all-hands.dev> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Ray Myers <ray.myers@gmail.com> Co-authored-by: Calvin Smith <email@cjsmith.io> Co-authored-by: Calvin Smith <calvin@all-hands.dev> Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com> Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: Graham Neubig <neubig@gmail.com> Co-authored-by: Panduka Muditha <pandukal@verdentra.com> Co-authored-by: Bashwara Undupitiya <bashwarau@verdentra.com> Co-authored-by: Rohit Malhotra <rohitvinodmalhotra@gmail.com>	2025-04-17 12:23:12 -04:00
dependabot[bot]	20ca2cd8b9	chore(deps): bump the version-all group across 1 directory with 9 updates (#7902 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-04-17 17:45:44 +02:00
mamoodi	ea0fcd6002	Change title of button to start new conversation (#7464 )	2025-04-17 11:16:33 -04:00
juanmichelini	6bcebd4b9d	Jetbrains CI Benchmark (#7811 ) Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>	2025-04-17 15:10:20 +00:00
Xingyao Wang	93e9db3206	Refactor system message handling to use event stream (#7824 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Calvin Smith <email@cjsmith.io>	2025-04-17 22:30:19 +08:00
Ray Myers	caf34d83bd	chore - remove dummy agent test (#7848 )	2025-04-17 08:52:02 -04:00
LoneRifle	49d3cd0863	fix(check-unlocalized-strings): make HTML tag test case-insens (#7892 )	2025-04-17 14:26:49 +02:00
sp.wack	34989f8e96	feat: Diff UI (#6934 )	2025-04-17 16:12:25 +04:00
Rohit Malhotra	9274664302	[Fix]: Rm unnecessary provider token serializer (#7889 )	2025-04-16 21:41:07 +00:00
mamoodi	437f0a0154	Release 0.33.0 (#7882 ) Co-authored-by: Xingyao Wang <xingyao@all-hands.dev> Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: tofarr <tofarr@gmail.com> Co-authored-by: Robert Brennan <accounts@rbren.io> Co-authored-by: Bashwara Undupitiya <65051545+bashwara@users.noreply.github.com>	2025-04-16 17:03:00 -04:00
Bashwara Undupitiya	9d79bf5fff	fix: Update folder security dialog styling (#7886 )	2025-04-16 14:33:00 -04:00
Robert Brennan	4c62b1d428	Fix: Ensure consistent tab height when workspace tab is selected (#7885 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-16 17:54:50 +00:00
Rohit Malhotra	b2a4b4ed90	[Refactor]: Modularize settings storage logic (#7868 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-16 13:30:13 -04:00
tofarr	9262babc3b	Fix for error on close (#7884 )	2025-04-16 11:13:58 -06:00
dependabot[bot]	1c80ded753	chore(deps-dev): bump the eslint group across 1 directory with 2 updates (#7790 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-04-16 19:41:25 +04:00
Xingyao Wang	4de8c4d6b1	Update repo microagent docs with frontend action handling information (#7856 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-16 14:27:31 +00:00
Xingyao Wang	91f2254039	frontend: fix terminal prompt and command styling (#7872 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-16 22:13:15 +08:00
Calvin Smith	66fd156c65	feat: Combining condensers (#7867 ) Co-authored-by: Calvin Smith <calvin@all-hands.dev>	2025-04-16 07:09:13 -06:00
Xingyao Wang	4ec16f3c2e	microagent: Update github.md to avoid agent marking PR as ready for review (#7873 )	2025-04-15 23:56:41 -04:00
Robert Brennan	628003abef	Convert terminal to tab, make terminal read only (#7795 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>	2025-04-16 11:22:52 +08:00
Xingyao Wang	07e400b73d	refactor(mcp): simplify MCP config & fix timeout (#7820 ) Co-authored-by: ducphamle2 <ducphamle212@gmail.com> Co-authored-by: trungbach <trunga2k29@gmail.com> Co-authored-by: quangdz1704 <Ntq.1704@gmail.com> Co-authored-by: Duc Pham <44611780+ducphamle2@users.noreply.github.com> Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-16 11:04:21 +08:00
Tom Deckers	7e14a512e0	Add base_domain parameter for GitHub Enterprise support (#7754 ) Co-authored-by: Tom Deckers <tdeckers@cisco.com> Co-authored-by: Robert Brennan <accounts@rbren.io> Co-authored-by: Rohit Malhotra <rohitvinodmalhotra@gmail.com>	2025-04-16 00:00:32 +00:00
Rohit Malhotra	d7e8f843ad	[Docs]: Add GitLab token setup documentation (#7635 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-15 17:25:25 -04:00
Xingyao Wang	e69ae81ad2	Add GPT-4.1 to function calling list (#7866 ) Co-authored-by: Juan Michelini <juan@juan.com.uy> Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-15 18:12:15 +02:00
dependabot[bot]	03c5db32e6	chore(deps): bump the version-all group with 8 updates (#7865 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-04-15 15:40:04 +00:00
Engel Nyst	5e5bf23f9c	[Evaluation] Fix KeyError when the instance failed prematurely (#7864 )	2025-04-15 15:19:31 +00:00
Shotaro Sano	e0fcd7a61e	Fix issue #6098 : Prevent duplicate error message display in chat interface (#7858 )	2025-04-15 16:21:23 +04:00
Ryan H. Tran	e9989d1085	Upgrade `openhands-aci` to 0.2.10 (#7810 )	2025-04-15 18:43:44 +07:00
Xingyao Wang	49c515b252	frontend: Display think action as action rather than text (#7852 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-15 09:09:16 +08:00
Robert Brennan	2d05578c21	Fix links in readme (#7854 )	2025-04-15 02:27:25 +04:00
Engel Nyst	d05a6f30e1	[Refactor] Rename codeact_* agent options to simple name (#7853 )	2025-04-15 00:14:13 +02:00
Calvin Smith	10c81c39fb	Fix export conversation button in Safari (#7662 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Calvin Smith <calvin@all-hands.dev>	2025-04-14 15:10:20 -06:00
sumeetkumar1701	2d599349ef	fix:Transmitting accurate head parameter in cross-repository pull requests. (#7788 )	2025-04-14 17:57:15 +00:00
mamoodi	33caf5c6ca	Update feature template to add note about adding reaction (#7847 )	2025-04-14 13:56:04 -04:00
Ciocanel Razvan	a9850766a7	Allow input for pr_type openhands-resolver.yml (#7619 ) Co-authored-by: Rohit Malhotra <rohitvinodmalhotra@gmail.com>	2025-04-14 17:53:58 +00:00
OpenHands	77e2416def	Fix issue #7826 : [Bug]: Chat input box is too small (#7827 )	2025-04-14 12:19:38 -05:00
蔡政特	02af9865ec	fix: Runtime local docker environment HTTPStatusError (#7648 ) Co-authored-by: Robert Brennan <accounts@rbren.io>	2025-04-14 15:41:54 +00:00
dependabot[bot]	75ca2aa6b1	chore(deps): bump the version-all group with 10 updates (#7846 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-04-14 17:16:27 +02:00
mamoodi	d820661592	Update docs on why we use sandbox user (#7845 )	2025-04-14 11:01:35 -04:00
Robert Brennan	1ff351a4f1	Add OpenHands Cloud to README, other minor tweaks (#7844 )	2025-04-14 14:01:52 +00:00
OpenHands	78b8e58561	Fix issue #7837 : [Bug]: Unit tests for tool use support (#7838 ) Co-authored-by: Engel Nyst <engel.nyst@gmail.com>	2025-04-14 15:45:37 +02:00
tofarr	fddbfce51a	Fix for race condition in cache (#7812 )	2025-04-12 07:43:34 -06:00
Rohit Malhotra	20d3766451	[Fix]: Use better auth header for GitLab microagent (#7828 )	2025-04-11 20:09:28 -04:00
sp.wack	72b5e18898	fix(backend): Return 400 if trying to open a binary file (#7825 )	2025-04-11 22:47:57 +00:00
Rohit Malhotra	03b8b8c19a	(Chore): Rm single provider legacy code (#7821 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-11 18:21:35 +00:00
Panduka Muditha	7c2f1b075e	feat: CLI enhancements to support /init, /help and /exit (#7801 ) Co-authored-by: Bashwara Undupitiya <bashwarau@verdentra.com>	2025-04-11 14:13:41 -04:00
Graham Neubig	883da1b28c	Add extensive typing to openhands/runtime/plugins directory (#7726 ) Co-authored-by: openhands <openhands@all-hands.dev>	2025-04-12 02:02:53 +08:00
Engel Nyst	bb98d94b35	[evaluation] fix missing metadata (#7819 )	2025-04-11 16:58:59 +00:00
sp.wack	d114c45135	chore: Improve pre-commit (#7818 )	2025-04-11 20:55:26 +04:00
Calvin Smith	36e092e0ac	fix: Disable prompt caching in default condenser (#7781 ) Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Calvin Smith <calvin@all-hands.dev> Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>	2025-04-11 10:09:23 -06:00
Ray Myers	e2bb69908a	chore - Rebuild docker image in fork CI instead of using artifacts (#7809 )	2025-04-11 11:06:46 -05:00
Ray Myers	cd33c5eac7	Revert "chore - User docker cache mount for vscode server archive (#7… (#7817 )	2025-04-11 16:04:50 +00:00
dependabot[bot]	0f8a139fb5	chore(deps): bump the version-all group with 5 updates (#7814 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-04-11 17:06:59 +02:00