Add evaluation changes without disabling repository memory

2026-04-29 03:00:45 -04:00 · 2025-04-15 15:06:58 +00:00
337 changed files with 5045 additions and 18345 deletions
--- a/.github/workflows/dummy-agent-test.yml
+++ b/.github/workflows/dummy-agent-test.yml
@@ -0,0 +1,53 @@
+# Workflow that uses the DummyAgent to run a simple task
+name: Run E2E test with dummy agent
+
+# Always run on "main"
+# Always run on PRs
+on:
+  push:
+    branches:
+    - main
+  pull_request:
+
+# If triggered by a PR, it will be in the same group. However, each commit on main will be in its own unique group
+concurrency:
+  group: ${{ github.workflow }}-${{ (github.head_ref && github.ref) || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    runs-on: blacksmith-4vcpu-ubuntu-2204
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Install tmux
+        run: sudo apt-get update && sudo apt-get install -y tmux
+      - name: Setup Node.js
+        uses: useblacksmith/setup-node@v5
+        with:
+          node-version: '22.x'
+      - name: Install poetry via pipx
+        run: pipx install poetry
+      - name: Set up Python
+        uses: useblacksmith/setup-python@v6
+        with:
+          python-version: '3.12'
+          cache: 'poetry'
+      - name: Install Python dependencies using Poetry
+        run: poetry install --without evaluation
+      - name: Build Environment
+        run: make build
+      - name: Run tests
+        run: |
+          set -e
+          SANDBOX_FORCE_REBUILD_RUNTIME=True poetry run python3 openhands/core/main.py -t "do a flip" -d ./workspace/ -c DummyAgent
+      - name: Check exit code
+        run: |
+          if [ $? -ne 0 ]; then
+            echo "Test failed"
+            exit 1
+          else
+            echo "Test passed"
+          fi
--- a/.github/workflows/openhands-resolver.yml
+++ b/.github/workflows/openhands-resolver.yml
@@ -179,7 +179,7 @@ jobs:

          echo "MAX_ITERATIONS=${{ inputs.max_iterations || 50 }}" >> $GITHUB_ENV
          echo "SANDBOX_ENV_GITHUB_TOKEN=${{ secrets.PAT_TOKEN || github.token }}" >> $GITHUB_ENV
-          echo "SANDBOX_BASE_CONTAINER_IMAGE=${{ inputs.base_container_image }}" >> $GITHUB_ENV
+          echo "SANDBOX_ENV_BASE_CONTAINER_IMAGE=${{ inputs.base_container_image }}" >> $GITHUB_ENV

          # Set branch variables
          echo "TARGET_BRANCH=${{ inputs.target_branch || 'main' }}" >> $GITHUB_ENV
--- a/.openhands/microagents/repo.md
+++ b/.openhands/microagents/repo.md
@@ -54,20 +54,3 @@ Frontend:
 ## Template for Github Pull Request

 If you are starting a pull request (PR), please follow the template in `.github/pull_request_template.md`.
-
-## Implementation Details
-
-These details may or may not be useful for your current task.
-
-### Frontend
-
-#### Action Handling:
- Actions are defined in `frontend/src/types/action-type.ts`
- The `HANDLED_ACTIONS` array in `frontend/src/state/chat-slice.ts` determines which actions are displayed as collapsible UI elements
- To add a new action type to the UI:
-  1. Add the action type to the `HANDLED_ACTIONS` array
-  2. Implement the action handling in `addAssistantAction` function in chat-slice.ts
-  3. Add a translation key in the format `ACTION_MESSAGE$ACTION_NAME` to the i18n files
- Actions with `thought` property are displayed in the UI based on their action type:
-  - Regular actions (like "run", "edit") display the thought as a separate message
-  - Special actions (like "think") are displayed as collapsible elements only
--- a/Development.md
+++ b/Development.md
@@ -118,7 +118,7 @@ poetry run pytest ./tests/unit/test_*.py
 To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker container image by
 setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.

-Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.34-nikolaik`
+Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.32-nikolaik`

 ## Develop inside Docker container

--- a/15
+++ b/15
@@ -39,7 +39,6 @@ ifeq ($(INSTALL_DOCKER),)
 	@$(MAKE) -s check-docker
 endif
 	@$(MAKE) -s check-poetry
-	@$(MAKE) -s check-tmux
 	@echo "$(GREEN)Dependencies checked successfully.$(RESET)"

 check-system:
@@ -102,18 +101,6 @@ check-docker:
 		exit 1; \
 	fi

-check-tmux:
-	@echo "$(YELLOW)Checking tmux installation...$(RESET)"
-	@if command -v tmux > /dev/null; then \
-		echo "$(BLUE)$(shell tmux -V) is already installed.$(RESET)"; \
-	else \
-		echo "$(YELLOW)╔════════════════════════════════════════════════════════════════════════════╗$(RESET)"; \
-		echo "$(YELLOW)║ OPTIONAL: tmux is not installed.                                          ║$(RESET)"; \
-		echo "$(YELLOW)║ Some advanced terminal features may not work without tmux.                ║$(RESET)"; \
-		echo "$(YELLOW)║ You can install it if needed, but it's not required for development.      ║$(RESET)"; \
-		echo "$(YELLOW)╚════════════════════════════════════════════════════════════════════════════╝$(RESET)"; \
-	fi
-
 check-poetry:
 	@echo "$(YELLOW)Checking Poetry installation...$(RESET)"
 	@if command -v poetry > /dev/null; then \
@@ -188,7 +175,7 @@ install-pre-commit-hooks:

 lint-backend:
 	@echo "$(YELLOW)Running linters...$(RESET)"
-	@poetry run pre-commit run --files openhands/**/* evaluation/**/* tests/**/* --show-diff-on-failure --config $(PRE_COMMIT_CONFIG_PATH)
+	@poetry run pre-commit run --files openhands/**/* agenthub/**/* evaluation/**/* --show-diff-on-failure --config $(PRE_COMMIT_CONFIG_PATH)

 lint-frontend:
 	@echo "$(YELLOW)Running linters for frontend...$(RESET)"
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@
  <br/>
  <a href="https://docs.all-hands.dev/modules/usage/getting-started"><img src="https://img.shields.io/badge/Documentation-000?logo=googledocs&logoColor=FFE165&style=for-the-badge" alt="Check out the documentation"></a>
  <a href="https://arxiv.org/abs/2407.16741"><img src="https://img.shields.io/badge/Paper%20on%20Arxiv-000?logoColor=FFE165&logo=arxiv&style=for-the-badge" alt="Paper on Arxiv"></a>
-  <a href="https://docs.google.com/spreadsheets/d/1wOUdFCMyY6Nt0AIqF705KN4JKOWgeI4wUGUP60krXXs/edit?gid=0#gid=0"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score"></a>
+  <a href="https://huggingface.co/spaces/OpenHands/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score"></a>
  <hr>
 </div>

@@ -52,17 +52,17 @@ system requirements and more information.


 ```bash
-docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
+docker pull docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik

 docker run -it --rm --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -e LOG_ALL_EVENTS=true \
    -v /var/run/docker.sock:/var/run/docker.sock \
    -v ~/.openhands-state:/.openhands-state \
    -p 3000:3000 \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app \
-    docker.all-hands.dev/all-hands-ai/openhands:0.34
+    docker.all-hands.dev/all-hands-ai/openhands:0.32
 ```

 You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!
--- a/config.template.toml
+++ b/config.template.toml
@@ -221,22 +221,9 @@ enable_browsing = true
 # Whether the LLM draft editor is enabled
 enable_llm_editor = false

-# Whether the standard editor tool (str_replace_editor) is enabled
-# Only has an effect if enable_llm_editor is False
-enable_editor = true
-
 # Whether the IPython tool is enabled
 enable_jupyter = true

-# Whether the command tool is enabled
-enable_cmd = true
-
-# Whether the think tool is enabled
-enable_think = true
-
-# Whether the finish tool is enabled
-enable_finish = true
-
 # LLM config group to use
 #llm_config = 'your-llm-config-group'

--- a/containers/dev/Dockerfile
+++ b/containers/dev/Dockerfile
@@ -61,8 +61,8 @@ RUN add-apt-repository ppa:deadsnakes/ppa \
    && apt-get install -y python3.12 python3.12-venv python3.12-dev python3-pip \
    && ln -s /usr/bin/python3.12 /usr/bin/python

-# NodeJS >= 22.x
-RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
+# NodeJS >= 18.17.1
+RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - \
    && apt-get install -y nodejs

 # Poetry >= 1.8
@@ -108,7 +108,7 @@ WORKDIR /app

 # cache build dependencies
 RUN \
-  --mount=type=bind,source=./,target=/app/,rw \
+  --mount=type=bind,source=./,target=/app/ \
  <<EOF
 #!/bin/bash
 make -s clean
--- a/containers/dev/compose.yml
+++ b/containers/dev/compose.yml
@@ -11,7 +11,7 @@ services:
      - BACKEND_HOST=${BACKEND_HOST:-"0.0.0.0"}
      - SANDBOX_API_HOSTNAME=host.docker.internal
      #
-      - SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.34-nikolaik}
+      - SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.32-nikolaik}
      - SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
      - WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
    ports:
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -7,7 +7,7 @@ services:
    image: openhands:latest
    container_name: openhands-app-${DATE:-}
    environment:
-      - SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik}
+      - SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik}
      #- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of openhands-state for this user
      - WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
    ports:
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -3,7 +3,6 @@

 # Production
 /build
-/static/swagger-ui

 # Generated files
 .docusaurus
--- a/docs/docusaurus.config.ts
+++ b/docs/docusaurus.config.ts
@@ -36,7 +36,6 @@ const config: Config = {
    mermaid: true,
  },
  themes: ['@docusaurus/theme-mermaid'],
-  plugins: [],
  presets: [
    [
      'classic',
@@ -76,11 +75,6 @@ const config: Config = {
          position: 'left',
          label: 'User Guides',
        },
-        {
-          href: 'https://docs.all-hands.dev/swagger-ui/', // FIXME: this should be a relative path, but docusarus steals the click
-          label: 'API',
-          position: 'left',
-        },
        {
          type: 'localeDropdown',
          position: 'left',
--- a/docs/generate-swagger-ui.js
+++ b/docs/generate-swagger-ui.js
@@ -1,102 +0,0 @@
-const fs = require('fs');
-const path = require('path');
-const swaggerUiDist = require('swagger-ui-dist');
-
-/**
- * This script manually sets up Swagger UI for the Docusaurus documentation.
- * 
- * Why we need this approach:
- * 1. Docusaurus doesn't have a built-in way to integrate Swagger UI
- * 2. We need to copy the necessary files from swagger-ui-dist to our static directory
- * 3. We need to create a custom index.html file that points to our OpenAPI spec
- * 4. This approach allows us to customize the Swagger UI to match our documentation style
- */
-
-// Get the absolute path to the swagger-ui-dist package
-const swaggerUiDistPath = swaggerUiDist.getAbsoluteFSPath();
-
-// Create the target directory if it doesn't exist
-const targetDir = path.join(__dirname, 'static', 'swagger-ui');
-if (!fs.existsSync(targetDir)) {
-  fs.mkdirSync(targetDir, { recursive: true });
-}
-
-// Copy all files from swagger-ui-dist to our target directory
-const files = fs.readdirSync(swaggerUiDistPath);
-files.forEach(file => {
-  const sourcePath = path.join(swaggerUiDistPath, file);
-  const targetPath = path.join(targetDir, file);
-  
-  // Skip directories and non-essential files
-  if (fs.statSync(sourcePath).isDirectory() || 
-      file === 'package.json' || 
-      file === 'README.md' ||
-      file.endsWith('.map')) {
-    return;
-  }
-  
-  fs.copyFileSync(sourcePath, targetPath);
-});
-
-// Create a custom index.html file that points to our OpenAPI spec
-const indexHtml = `
-<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="UTF-8">
-  <title>OpenHands API Documentation</title>
-  <link rel="stylesheet" type="text/css" href="./swagger-ui.css" />
-  <link rel="icon" type="image/png" href="./favicon-32x32.png" sizes="32x32" />
-  <link rel="icon" type="image/png" href="./favicon-16x16.png" sizes="16x16" />
-  <style>
-    html {
-      box-sizing: border-box;
-      overflow: -moz-scrollbars-vertical;
-      overflow-y: scroll;
-    }
-    
-    *,
-    *:before,
-    *:after {
-      box-sizing: inherit;
-    }
-    
-    body {
-      margin: 0;
-      background: #fafafa;
-    }
-  </style>
-</head>
-
-<body>
-  <div id="swagger-ui"></div>
-
-  <script src="./swagger-ui-bundle.js" charset="UTF-8"> </script>
-  <script src="./swagger-ui-standalone-preset.js" charset="UTF-8"> </script>
-  <script>
-    window.onload = function() {
-      // Begin Swagger UI call region
-      const ui = SwaggerUIBundle({
-        url: "/openapi.json",
-        dom_id: '#swagger-ui',
-        deepLinking: true,
-        presets: [
-          SwaggerUIBundle.presets.apis,
-          SwaggerUIStandalonePreset
-        ],
-        plugins: [
-          SwaggerUIBundle.plugins.DownloadUrl
-        ],
-        layout: "StandaloneLayout"
-      });
-      // End Swagger UI call region
-      window.ui = ui;
-    };
-  </script>
-</body>
-</html>
-`;
-
-fs.writeFileSync(path.join(targetDir, 'index.html'), indexHtml);
-
-console.log('Swagger UI files generated successfully in static/swagger-ui/');
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/how-to/cli-mode.md
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/how-to/cli-mode.md
@@ -52,7 +52,7 @@ LLM_API_KEY="sk_test_12345"
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -61,7 +61,7 @@ docker run -it \
    -v /var/run/docker.sock:/var/run/docker.sock \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.34 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
    python -m openhands.core.cli
 ```

--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/how-to/gui-mode.md
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/how-to/gui-mode.md
@@ -21,18 +21,14 @@ OpenHands fournit un mode Interface Graphique (GUI) convivial pour interagir ave
 3. Entrez la `Clé API` correspondante pour le fournisseur choisi.
 4. Cliquez sur "Enregistrer" pour appliquer les paramètres.

-### Jetons de Contrôle de Version
-
-OpenHands prend en charge plusieurs fournisseurs de contrôle de version. Vous pouvez configurer des jetons pour plusieurs fournisseurs simultanément.
-
-#### Configuration du Jeton GitHub
+### Configuration du Jeton GitHub

 OpenHands exporte automatiquement un `GITHUB_TOKEN` vers l'environnement shell s'il est disponible. Cela peut se produire de deux manières :

 1. **Localement (OSS)** : L'utilisateur saisit directement son jeton GitHub
 2. **En ligne (SaaS)** : Le jeton est obtenu via l'authentification OAuth GitHub

-##### Configuration d'un Jeton GitHub Local
+#### Configuration d'un Jeton GitHub Local

 1. **Générer un Personal Access Token (PAT)** :
   - Allez dans Paramètres GitHub > Paramètres développeur > Personal Access Tokens > Tokens (classique)
@@ -44,11 +40,11 @@ OpenHands exporte automatiquement un `GITHUB_TOKEN` vers l'environnement shell s

 2. **Entrer le Jeton dans OpenHands** :
   - Cliquez sur le bouton Paramètres (icône d'engrenage) en haut à droite
-   - Accédez à la section "Git Provider Settings"
+   - Accédez à la section "GitHub"
   - Collez votre jeton dans le champ "Jeton GitHub"
   - Cliquez sur "Enregistrer" pour appliquer les modifications

-##### Politiques de Jetons Organisationnels
+#### Politiques de Jetons Organisationnels

 Si vous travaillez avec des dépôts organisationnels, une configuration supplémentaire peut être nécessaire :

@@ -63,7 +59,7 @@ Si vous travaillez avec des dépôts organisationnels, une configuration supplé
   - Si nécessaire, cliquez sur "Activer SSO" à côté de votre organisation
   - Terminez le processus d'autorisation SSO

-##### Authentification OAuth (Mode En Ligne)
+#### Authentification OAuth (Mode En Ligne)

 Lorsque vous utilisez OpenHands en mode en ligne, le flux OAuth GitHub :

@@ -78,7 +74,7 @@ Lorsque vous utilisez OpenHands en mode en ligne, le flux OAuth GitHub :
   - Autorisez OpenHands à accéder à votre compte GitHub
   - Si vous utilisez une organisation, autorisez l'accès à l'organisation si vous y êtes invité

-##### Dépannage
+#### Dépannage

 Problèmes courants et solutions :

@@ -99,43 +95,6 @@ Problèmes courants et solutions :
   - Vérifiez la console du navigateur pour tout message d'erreur
   - Utilisez le bouton "Tester la connexion" dans les paramètres s'il est disponible

-#### Configuration du Jeton GitLab
-
-OpenHands exporte automatiquement un `GITLAB_TOKEN` vers l'environnement shell, uniquement pour les installations locales, s'il est disponible.
-
-##### Configuration d'un Jeton GitLab
-
-1. **Générer un Personal Access Token (PAT)** :
-   - Sur GitLab, allez dans Paramètres utilisateur > Jetons d'accès
-   - Créez un nouveau jeton avec les portées suivantes :
-     - `api` (Accès API)
-     - `read_user` (Lecture des informations utilisateur)
-     - `read_repository` (Lecture du dépôt)
-     - `write_repository` (Écriture du dépôt)
-   - Définissez une date d'expiration ou laissez vide pour un jeton sans expiration
-
-2. **Entrer le Jeton dans OpenHands** :
-   - Cliquez sur le bouton Paramètres (icône d'engrenage)
-   - Accédez à la section `Git Provider Settings`
-   - Collez votre jeton dans le champ `Jeton GitLab`
-   - Si vous utilisez GitLab auto-hébergé, entrez l'URL de votre instance GitLab
-   - Cliquez sur `Enregistrer les modifications` pour appliquer les changements
-
-##### Dépannage
-
-Problèmes courants et solutions :
-
-1. **Jeton Non Reconnu** :
-   - Assurez-vous que le jeton est correctement enregistré dans les paramètres
-   - Vérifiez que le jeton n'a pas expiré
-   - Vérifiez que le jeton a les portées requises
-   - Pour les instances auto-hébergées, vérifiez l'URL correcte de l'instance
-
-2. **Accès Refusé** :
-   - Vérifiez les permissions d'accès au projet
-   - Vérifiez si le jeton possède les portées nécessaires
-   - Pour les dépôts de groupe/organisation, assurez-vous d'avoir les accès appropriés
-
 ### Paramètres Avancés

 1. Basculez sur `Options Avancées` pour accéder aux paramètres supplémentaires.
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/how-to/headless-mode.md
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/how-to/headless-mode.md
@@ -46,7 +46,7 @@ LLM_API_KEY="sk_test_12345"
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -56,6 +56,6 @@ docker run -it \
    -v /var/run/docker.sock:/var/run/docker.sock \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.34 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
    python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
 ```
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/installation.mdx
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/installation.mdx
@@ -13,16 +13,16 @@
 La façon la plus simple d'exécuter OpenHands est avec Docker.

 ```bash
-docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
+docker pull docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik

 docker run -it --rm --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -e LOG_ALL_EVENTS=true \
    -v /var/run/docker.sock:/var/run/docker.sock \
    -p 3000:3000 \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app \
-    docker.all-hands.dev/all-hands-ai/openhands:0.34
+    docker.all-hands.dev/all-hands-ai/openhands:0.32
 ```

 Vous pouvez également exécuter OpenHands en mode [headless scriptable](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), en tant que [CLI interactive](https://docs.all-hands.dev/modules/usage/how-to/cli-mode), ou en utilisant l'[Action GitHub OpenHands](https://docs.all-hands.dev/modules/usage/how-to/github-action).
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/runtimes.md
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/runtimes.md
@@ -13,7 +13,7 @@ C'est le Runtime par défaut qui est utilisé lorsque vous démarrez OpenHands.

 ```
 docker run # ...
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -v /var/run/docker.sock:/var/run/docker.sock \
    # ...
 ```
--- a/docs/i18n/ja/docusaurus-plugin-content-docs/current/usage/how-to/cli-mode.md
+++ b/docs/i18n/ja/docusaurus-plugin-content-docs/current/usage/how-to/cli-mode.md
@@ -34,7 +34,7 @@ Docker で OpenHands を CLI モードで実行するには:
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -44,7 +44,7 @@ docker run -it \
    -v ~/.openhands-state:/.openhands-state \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.34 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
    python -m openhands.core.cli
 ```

--- a/docs/i18n/ja/docusaurus-plugin-content-docs/current/usage/how-to/gui-mode.md
+++ b/docs/i18n/ja/docusaurus-plugin-content-docs/current/usage/how-to/gui-mode.md
@@ -16,11 +16,7 @@ OpenHandsは、AI アシスタントとやり取りするためのグラフィ
 3. 選択したプロバイダーに対応する`API Key`を入力します。
 4. `Save Changes`をクリックして設定を適用します。

-### バージョン管理トークン
-
-OpenHandsは複数のバージョン管理プロバイダーをサポートしています。複数のプロバイダーのトークンを同時に設定できます。
-
-#### GitHubトークンの設定
+### GitHubトークンの設定

 OpenHandsは、利用可能な場合、自動的に`GITHUB_TOKEN`をシェル環境にエクスポートします。これは2つの方法で行われます。

@@ -38,7 +34,7 @@ OpenHandsは、利用可能な場合、自動的に`GITHUB_TOKEN`をシェル環
     - Minimal Permissions（検索用に**Meta Data = Read-only**を選択し、ブランチ作成用に**Pull Requests = Read and Write**、**Content = Read and Write**を選択します）
  2. **OpenHandsにトークンを入力**:
   - 設定ボタン（歯車アイコン）をクリックします。
-   - `Git Provider Settings`セクションに移動します。
+   - `GitHub Settings`セクションに移動します。
   - `GitHub Token`フィールドにトークンを貼り付けます。
   - `Save Changes`をクリックして変更を適用します。
 </details>
@@ -98,46 +94,6 @@ OpenHandsは、利用可能な場合、自動的に`GITHUB_TOKEN`をシェル環
   - 組織を使用している場合は、プロンプトが表示されたら組織へのアクセスを承認します。
 </details>

-#### GitLabトークンの設定
-
-OpenHandsは、利用可能な場合、ローカルインストールのみ、自動的に`GITLAB_TOKEN`をシェル環境にエクスポートします。
-
-<details>
-  <summary>GitLabトークンの設定</summary>
-
-  1. **Personal Access Token（PAT）の生成**:
-   - GitLabで、User Settings > Access Tokensに移動します。
-   - 以下のスコープを持つ新しいトークンを作成します:
-     - `api`（APIアクセス）
-     - `read_user`（ユーザー情報の読み取り）
-     - `read_repository`（リポジトリ読み取り）
-     - `write_repository`（リポジトリ書き込み）
-   - 有効期限を設定するか、無期限トークンの場合は空白のままにします。
-  2. **OpenHandsにトークンを入力**:
-   - 設定ボタン（歯車アイコン）をクリックします。
-   - `Git Provider Settings`セクションに移動します。
-   - `GitLab Token`フィールドにトークンを貼り付けます。
-   - セルフホスト型GitLabを使用している場合は、GitLabインスタンスのURLを入力します。
-   - `Save Changes`をクリックして変更を適用します。
-</details>
-
-<details>
-  <summary>トラブルシューティング</summary>
-
-  一般的な問題と解決策:
-
-  - **トークンが認識されない**:
-     - トークンが設定に正しく保存されていることを確認します。
-     - トークンの有効期限が切れていないことを確認します。
-     - トークンに必要なスコープがあることを確認します。
-     - セルフホスト型インスタンスの場合は、正しいインスタンスURLを確認します。
-
-  - **アクセスが拒否された**:
-     - プロジェクトのアクセス権限を確認します。
-     - トークンに必要なスコープがあるかどうかを確認します。
-     - グループ/組織のリポジトリの場合は、適切なアクセス権があることを確認します。
-</details>
-
 ### 高度な設定

 1. 設定ページ内で、`Advanced`オプションを切り替えて追加の設定にアクセスします。
--- a/docs/i18n/ja/docusaurus-plugin-content-docs/current/usage/how-to/headless-mode.md
+++ b/docs/i18n/ja/docusaurus-plugin-content-docs/current/usage/how-to/headless-mode.md
@@ -31,7 +31,7 @@ DockerでOpenHandsをヘッドレスモードで実行するには:
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -42,7 +42,7 @@ docker run -it \
    -v ~/.openhands-state:/.openhands-state \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.34 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
    python -m openhands.core.main -t "write a bash script that prints hi"
 ```

--- a/docs/i18n/ja/docusaurus-plugin-content-docs/current/usage/runtimes/docker.md
+++ b/docs/i18n/ja/docusaurus-plugin-content-docs/current/usage/runtimes/docker.md
@@ -25,7 +25,7 @@ nikolaik の `SANDBOX_RUNTIME_CONTAINER_IMAGE` は、ランタイムサーバー

    ```bash
    docker run # ...
-        -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+        -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
        -e SANDBOX_USER_ID=$(id -u) \
        -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
        -v $WORKSPACE_BASE:/opt/workspace_base \
@@ -82,5 +82,5 @@ docker network create openhands-network
 # 分離されたネットワークで OpenHands を実行
 docker run # ... \
    --network openhands-network \
-    docker.all-hands.dev/all-hands-ai/openhands:0.34
+    docker.all-hands.dev/all-hands-ai/openhands:0.32
 ```
--- a/docs/i18n/pt-BR/docusaurus-plugin-content-docs/current/usage/how-to/cli-mode.md
+++ b/docs/i18n/pt-BR/docusaurus-plugin-content-docs/current/usage/how-to/cli-mode.md
@@ -35,7 +35,7 @@ Para executar o OpenHands no modo CLI com Docker:
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -45,7 +45,7 @@ docker run -it \
    -v ~/.openhands-state:/.openhands-state \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.34 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
    python -m openhands.core.cli
 ```

--- a/docs/i18n/pt-BR/docusaurus-plugin-content-docs/current/usage/how-to/gui-mode.md
+++ b/docs/i18n/pt-BR/docusaurus-plugin-content-docs/current/usage/how-to/gui-mode.md
@@ -17,11 +17,7 @@ O OpenHands fornece um modo de Interface Gráfica do Usuário (GUI) para interag
 3. Insira a `Chave de API` correspondente para o provedor escolhido.
 4. Clique em `Salvar Alterações` para aplicar as configurações.

-### Tokens de Controle de Versão
-
-O OpenHands suporta múltiplos provedores de controle de versão. Você pode configurar tokens para vários provedores simultaneamente.
-
-#### Configuração do Token do GitHub
+### Configuração do Token do GitHub

 O OpenHands exporta automaticamente um `GITHUB_TOKEN` para o ambiente shell se ele estiver disponível. Isso pode acontecer de duas maneiras:

@@ -39,7 +35,7 @@ O OpenHands exporta automaticamente um `GITHUB_TOKEN` para o ambiente shell se e
     - Minimal Permissions (Selecione **Meta Data = Read-only** para pesquisa, **Pull Requests = Read and Write**, **Content = Read and Write** para criação de branches)
  2. **Insira o Token no OpenHands**:
   - Clique no botão Settings (ícone de engrenagem).
-   - Navegue até a seção `Git Provider Settings`.
+   - Navegue até a seção `GitHub Settings`.
   - Cole seu token no campo `GitHub Token`.
   - Clique em `Save Changes` para aplicar as alterações.
 </details>
@@ -99,46 +95,6 @@ O OpenHands exporta automaticamente um `GITHUB_TOKEN` para o ambiente shell se e
   - Se estiver usando uma organização, autorize o acesso à organização se solicitado.
 </details>

-#### Configuração do Token do GitLab
-
-O OpenHands exporta automaticamente um `GITLAB_TOKEN` para o ambiente shell, apenas para instalações locais, se ele estiver disponível.
-
-<details>
-  <summary>Configurando um Token do GitLab</summary>
-
-  1. **Gere um Personal Access Token (PAT)**:
-   - No GitLab, vá para User Settings > Access Tokens.
-   - Crie um novo token com os seguintes escopos:
-     - `api` (Acesso à API)
-     - `read_user` (Leitura de informações do usuário)
-     - `read_repository` (Leitura do repositório)
-     - `write_repository` (Escrita no repositório)
-   - Defina uma data de expiração ou deixe em branco para um token sem expiração.
-  2. **Insira o Token no OpenHands**:
-   - Clique no botão Settings (ícone de engrenagem).
-   - Navegue até a seção `Git Provider Settings`.
-   - Cole seu token no campo `GitLab Token`.
-   - Se estiver usando GitLab auto-hospedado, insira a URL da sua instância GitLab.
-   - Clique em `Save Changes` para aplicar as alterações.
-</details>
-
-<details>
-  <summary>Solução de Problemas</summary>
-
-  Problemas comuns e soluções:
-
-  - **Token Não Reconhecido**:
-     - Certifique-se de que o token esteja salvo corretamente nas configurações.
-     - Verifique se o token não expirou.
-     - Verifique se o token possui os escopos necessários.
-     - Para instâncias auto-hospedadas, verifique a URL correta da instância.
-
-  - **Acesso Negado**:
-     - Verifique as permissões de acesso ao projeto.
-     - Verifique se o token possui os escopos necessários.
-     - Para repositórios de grupo/organização, certifique-se de ter o acesso adequado.
-</details>
-
 ### Configurações Avançadas

 1. Dentro da página Settings, ative as opções `Advanced` para acessar configurações adicionais.
--- a/docs/i18n/pt-BR/docusaurus-plugin-content-docs/current/usage/how-to/headless-mode.md
+++ b/docs/i18n/pt-BR/docusaurus-plugin-content-docs/current/usage/how-to/headless-mode.md
@@ -32,7 +32,7 @@ Para executar o OpenHands no modo Headless com Docker:
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -43,7 +43,7 @@ docker run -it \
    -v ~/.openhands-state:/.openhands-state \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.34 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
    python -m openhands.core.main -t "escreva um script bash que imprima oi"
 ```

--- a/docs/i18n/pt-BR/docusaurus-plugin-content-docs/current/usage/installation.mdx
+++ b/docs/i18n/pt-BR/docusaurus-plugin-content-docs/current/usage/installation.mdx
@@ -58,17 +58,17 @@
 A maneira mais fácil de executar o OpenHands é no Docker.

 ```bash
-docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
+docker pull docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik

 docker run -it --rm --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -e LOG_ALL_EVENTS=true \
    -v /var/run/docker.sock:/var/run/docker.sock \
    -v ~/.openhands-state:/.openhands-state \
    -p 3000:3000 \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app \
-    docker.all-hands.dev/all-hands-ai/openhands:0.34
+    docker.all-hands.dev/all-hands-ai/openhands:0.32
 ```

 Você encontrará o OpenHands em execução em http://localhost:3000!
--- a/docs/i18n/pt-BR/docusaurus-plugin-content-docs/current/usage/runtimes.md
+++ b/docs/i18n/pt-BR/docusaurus-plugin-content-docs/current/usage/runtimes.md
@@ -13,7 +13,7 @@ Este é o Runtime padrão que é usado quando você inicia o OpenHands. Você po

 ```
 docker run # ...
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -v /var/run/docker.sock:/var/run/docker.sock \
    # ...
 ```
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/how-to/cli-mode.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/how-to/cli-mode.md
@@ -50,7 +50,7 @@ LLM_API_KEY="sk_test_12345"
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -59,7 +59,7 @@ docker run -it \
    -v /var/run/docker.sock:/var/run/docker.sock \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.34 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
    python -m openhands.core.cli
 ```

--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/how-to/gui-mode.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/how-to/gui-mode.md
@@ -19,18 +19,14 @@ OpenHands 提供了一个用户友好的图形用户界面（GUI）模式，用
 3. 输入所选提供商对应的 `API Key`。
 4. 点击"保存"应用设置。

-### 版本控制令牌
-
-OpenHands 支持多个版本控制提供商。您可以同时配置多个提供商的令牌。
-
-#### GitHub Token 设置
+### GitHub Token 设置

 如果可用，OpenHands 会自动将 `GITHUB_TOKEN` 导出到 shell 环境中。这可以通过两种方式实现：

 1. **本地（OSS）**：用户直接输入他们的 GitHub token
 2. **在线（SaaS）**：通过 GitHub OAuth 身份验证获取 token

-##### 设置本地 GitHub Token
+#### 设置本地 GitHub Token

 1. **生成个人访问令牌（PAT）**：
   - 转到 GitHub 设置 > 开发者设置 > 个人访问令牌 > 令牌（经典）
@@ -42,11 +38,11 @@ OpenHands 支持多个版本控制提供商。您可以同时配置多个提供

 2. **在 OpenHands 中输入令牌**：
   - 点击右上角的设置按钮（齿轮图标）
-   - 导航到"Git Provider Settings"部分
+   - 导航到"GitHub"部分
   - 将令牌粘贴到"GitHub Token"字段中
   - 点击"保存"应用更改

-##### 组织令牌策略
+#### 组织令牌策略

 如果您使用组织仓库，可能需要额外的设置：

@@ -61,7 +57,7 @@ OpenHands 支持多个版本控制提供商。您可以同时配置多个提供
   - 如果需要，点击组织旁边的"启用 SSO"
   - 完成 SSO 授权过程

-##### OAuth 身份验证（在线模式）
+#### OAuth 身份验证（在线模式）

 在在线模式下使用 OpenHands 时，GitHub OAuth 流程：

@@ -76,7 +72,7 @@ OpenHands 支持多个版本控制提供商。您可以同时配置多个提供
   - 授权 OpenHands 访问您的 GitHub 帐户
   - 如果使用组织，在出现提示时授权组织访问

-##### 故障排除
+#### 故障排除

 常见问题和解决方案：

@@ -97,43 +93,6 @@ OpenHands 支持多个版本控制提供商。您可以同时配置多个提供
   - 检查浏览器控制台中是否有任何错误消息
   - 如果可用，使用设置中的"测试连接"按钮

-#### GitLab Token 设置
-
-OpenHands 会自动将 `GITLAB_TOKEN` 导出到 shell 环境中，仅适用于本地安装，如果它可用的话。
-
-##### 设置 GitLab Token
-
-1. **生成个人访问令牌（PAT）**：
-   - 在 GitLab 中，转到用户设置 > 访问令牌
-   - 创建具有以下范围的新令牌：
-     - `api`（API 访问）
-     - `read_user`（读取用户信息）
-     - `read_repository`（读取仓库）
-     - `write_repository`（写入仓库）
-   - 设置过期日期或留空以获取永不过期的令牌
-
-2. **在 OpenHands 中输入令牌**：
-   - 点击设置按钮（齿轮图标）
-   - 导航到 `Git Provider Settings` 部分
-   - 将令牌粘贴到 `GitLab Token` 字段中
-   - 如果使用自托管 GitLab，请输入您的 GitLab 实例 URL
-   - 点击 `Save Changes` 应用更改
-
-##### 故障排除
-
-常见问题和解决方案：
-
-1. **令牌无法识别**：
-   - 确保令牌已正确保存在设置中
-   - 检查令牌是否已过期
-   - 验证令牌是否具有所需的范围
-   - 对于自托管实例，验证正确的实例 URL
-
-2. **访问被拒绝**：
-   - 验证项目访问权限
-   - 检查令牌是否具有必要的范围
-   - 对于组/组织仓库，确保您拥有适当的访问权限
-
 ### 高级设置

 1. 切换`高级选项`以访问其他设置。
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/how-to/headless-mode.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/how-to/headless-mode.md
@@ -47,7 +47,7 @@ LLM_API_KEY="sk_test_12345"
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -57,6 +57,6 @@ docker run -it \
    -v /var/run/docker.sock:/var/run/docker.sock \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.34 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
    python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
 ```
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/installation.mdx
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/installation.mdx
@@ -11,16 +11,16 @@
 在 Docker 中运行 OpenHands 是最简单的方式。

 ```bash
-docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
+docker pull docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik

 docker run -it --rm --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -e LOG_ALL_EVENTS=true \
    -v /var/run/docker.sock:/var/run/docker.sock \
    -p 3000:3000 \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app \
-    docker.all-hands.dev/all-hands-ai/openhands:0.34
+    docker.all-hands.dev/all-hands-ai/openhands:0.32
 ```

 你也可以在可脚本化的[无头模式](https://docs.all-hands.dev/modules/usage/how-to/headless-mode)下运行 OpenHands，作为[交互式 CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode)，或使用 [OpenHands GitHub Action](https://docs.all-hands.dev/modules/usage/how-to/github-action)。
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/runtimes.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/runtimes.md
@@ -11,7 +11,7 @@

 ```
 docker run # ...
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -v /var/run/docker.sock:/var/run/docker.sock \
    # ...
 ```
--- a/docs/modules/usage/cloud/openhands-cloud.mdx
+++ b/docs/modules/usage/cloud/openhands-cloud.mdx
@@ -8,22 +8,18 @@ OpenHands Cloud can be accessed at https://app.all-hands.dev/.

 ## Getting Started

-After visiting OpenHands Cloud, you will be asked to connect with your GitHub or GitLab account:
-
-1. After reading and accepting the terms of service, click `Log in with GitHub` or `Log in with GitLab`.
+After visiting OpenHands Cloud, you will be asked to connect with your GitHub account:
+1. After reading and accepting the terms of service, click `Connect to GitHub`.
 2. Review the permissions requested by OpenHands and then click `Authorize OpenHands AI`.
-   - OpenHands will require some permissions from your GitHub or GitLab account. To read more about these permissions:
-     - GitHub: You can click the `Learn more` link on the GitHub authorize page.
-     - GitLab: You can expand each permission request on the GitLab authorize page.
+   - OpenHands will require some permissions from your GitHub account. To read more about these permissions,
+     you can click the `Learn more` link on the GitHub authorize page.

 ## Repository Access

-### GitHub
-
-#### Adding Repository Access
+### Adding Repository Access

 You can grant OpenHands specific repository access:
-1. Click `Add GitHub repos` on the Home page.
+1. Click the `Select a GitHub project` dropdown, select `Add more repositories...`.
 2. Select the organization, then choose the specific repositories to grant OpenHands access to.
   <details>
     <summary>Permission Details for Repository Access</summary>
@@ -46,15 +42,11 @@ You can grant OpenHands specific repository access:

 3. Click on `Install & Authorize`.

-#### Modifying Repository Access
+### Modifying Repository Access

-You can modify GitHub repository access at any time by:
-* Using the same `Add GitHub repos` workflow, or
-* Visiting the Settings page and selecting `Configure GitHub Repositories` under the `Git Settings` section.
-
-### GitLab
-
-When using your GitLab account, OpenHands will automatically have access to your repositories.
+You can modify repository access at any time by:
+* Using the same `Select a GitHub project > Add more repositories` workflow, or
+* Visiting the Settings page and selecting `Configure GitHub Repositories` under the `GitHub Settings` section.

 ## Conversation Persistence

--- a/docs/modules/usage/how-to/cli-mode.md
+++ b/docs/modules/usage/how-to/cli-mode.md
@@ -35,7 +35,7 @@ To run OpenHands in CLI mode with Docker:
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -45,7 +45,7 @@ docker run -it \
    -v ~/.openhands-state:/.openhands-state \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.34 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
    python -m openhands.core.cli
 ```

--- a/docs/modules/usage/how-to/custom-sandbox-guide.md
+++ b/docs/modules/usage/how-to/custom-sandbox-guide.md
@@ -1,8 +1,7 @@
 # Custom Sandbox

 :::note
-This guide is for users that would like to use their own custom Docker image for the runtime. For example
-with certain tools or programming languages pre-installed.
+This guide is for users that would like to use their own custom Docker image for the runtime, e.g. with certain tools or programming languages pre-installed
 :::

 The sandbox is where the agent performs its tasks. Instead of running commands directly on your computer
--- a/docs/modules/usage/how-to/gui-mode.md
+++ b/docs/modules/usage/how-to/gui-mode.md
@@ -18,14 +18,11 @@ OpenHands provides a Graphical User Interface (GUI) mode for interacting with th
 3. Enter the corresponding `API Key` for your chosen provider.
 4. Click `Save Changes` to apply the settings.

-### Version Control Tokens
+### GitHub Token Setup

-OpenHands supports multiple version control providers. You can configure tokens for multiple providers simultaneously.
-
-#### GitHub Token Setup
-
-OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if provided:
+OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if it is available. This can happen in two ways:

+**Local Installation**: The user directly inputs their GitHub token.
 <details>
  <summary>Setting Up a GitHub Token</summary>

@@ -39,8 +36,9 @@ OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if pro
     - Minimal Permissions ( Select `Meta Data = Read-only` read for search, `Pull Requests = Read and Write` and `Content = Read and Write` for branch creation)
  2. **Enter Token in OpenHands**:
   - Click the Settings button (gear icon).
+   - Navigate to the `GitHub Settings` section.
   - Paste your token in the `GitHub Token` field.
-   - Click `Save` to apply the changes.
+   - Click `Save Changes` to apply the changes.
 </details>

 <details>
@@ -81,43 +79,21 @@ OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if pro
     - Check the browser console for any error messages.
 </details>

-#### GitLab Token Setup
-
-OpenHands automatically exports a `GITLAB_TOKEN` to the shell environment if provided:
+**OpenHands Cloud**: The token is obtained through GitHub OAuth authentication.

 <details>
-  <summary>Setting Up a GitLab Token</summary>
+  <summary>OAuth Authentication</summary>

-  1. **Generate a Personal Access Token (PAT)**:
-   - On GitLab, go to User Settings > Access Tokens.
-   - Create a new token with the following scopes:
-     - `api` (API access)
-     - `read_user` (Read user information)
-     - `read_repository` (Read repository)
-     - `write_repository` (Write repository)
-   - Set an expiration date or leave it blank for a non-expiring token.
-  2. **Enter Token in OpenHands**:
-   - Click the Settings button (gear icon).
-   - Paste your token in the `GitLab Token` field.
-   - Enter your GitLab instance URL if using self-hosted GitLab.
-   - Click `Save` to apply the changes.
-</details>
+  When using OpenHands Cloud, the GitHub OAuth flow requests the following permissions:
+   - Repository access (read/write)
+   - Workflow management
+   - Organization read access

-<details>
-  <summary>Troubleshooting</summary>
-
-  Common issues and solutions:
-
-  - **Token Not Recognized**:
-     - Ensure the token is properly saved in settings.
-     - Check that the token hasn't expired.
-     - Verify the token has the required scopes.
-     - For self-hosted instances, verify the correct instance URL.
-
-  - **Access Denied**:
-     - Verify project access permissions.
-     - Check if the token has the necessary scopes.
-     - For group/organization repositories, ensure you have proper access.
+  To authenticate OpenHands:
+   - Click `Sign in with GitHub` when prompted.
+   - Review the requested permissions.
+   - Authorize OpenHands to access your GitHub account.
+   - If using an organization, authorize organization access if prompted.
 </details>

 ### Advanced Settings
--- a/docs/modules/usage/how-to/headless-mode.md
+++ b/docs/modules/usage/how-to/headless-mode.md
@@ -32,7 +32,7 @@ To run OpenHands in Headless mode with Docker:
 ```bash
 docker run -it \
    --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -e SANDBOX_USER_ID=$(id -u) \
    -e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
    -e LLM_API_KEY=$LLM_API_KEY \
@@ -43,7 +43,7 @@ docker run -it \
    -v ~/.openhands-state:/.openhands-state \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app-$(date +%Y%m%d%H%M%S) \
-    docker.all-hands.dev/all-hands-ai/openhands:0.34 \
+    docker.all-hands.dev/all-hands-ai/openhands:0.32 \
    python -m openhands.core.main -t "write a bash script that prints hi"
 ```

--- a/docs/modules/usage/installation.mdx
+++ b/docs/modules/usage/installation.mdx
@@ -58,17 +58,17 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to
 The easiest way to run OpenHands is in Docker.

 ```bash
-docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
+docker pull docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik

 docker run -it --rm --pull=always \
-    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
+    -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.32-nikolaik \
    -e LOG_ALL_EVENTS=true \
    -v /var/run/docker.sock:/var/run/docker.sock \
    -v ~/.openhands-state:/.openhands-state \
    -p 3000:3000 \
    --add-host host.docker.internal:host-gateway \
    --name openhands-app \
-    docker.all-hands.dev/all-hands-ai/openhands:0.34
+    docker.all-hands.dev/all-hands-ai/openhands:0.32
 ```

 You'll find OpenHands running at http://localhost:3000!
--- a/docs/modules/usage/key-features.md
+++ b/docs/modules/usage/key-features.md
@@ -6,26 +6,23 @@
 - Displays the conversation between the user and OpenHands.
 - OpenHands explains its actions in this panel.

-### Changes
- Shows the file changes performed by OpenHands.
-
 ### Workspace
 - Browse project files and directories.
 - Use the `Open in VS Code` option to:
  * Modify files
  * Upload and download files

-### Terminal
- A space for OpenHands and users to run terminal commands.
-
 ### Jupyter
 - Shows all Python commands that were executed by OpenHands.
 - Particularly handy when using OpenHands to perform data visualization tasks.

 ### App
- Displays the web server when OpenHands runs an application.
+- Shows the web server when OpenHands runs an application.
 - Users can interact with the running application.

 ### Browser
 - Used by OpenHands to browse websites.
 - The browser is non-interactive.
+
+### Terminal
+- A space for OpenHands and users to run terminal commands.
--- a/docs/modules/usage/llms/llms.md
+++ b/docs/modules/usage/llms/llms.md
@@ -17,8 +17,6 @@ Based on these findings and community feedback, the following models have been v
 - [gemini/gemini-2.5-pro](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/)
 - [deepseek/deepseek-chat](https://api-docs.deepseek.com/)
 - [openai/o3-mini](https://openai.com/index/openai-o3-mini/)
- [openai/o3](https://openai.com/index/introducing-o3-and-o4-mini/)
- [openai/o4-mini](https://openai.com/index/introducing-o3-and-o4-mini/)
 - [all-hands/openhands-lm-32b-v0.1](https://www.all-hands.dev/blog/introducing-openhands-lm-32b----a-strong-open-coding-agent-model) -- available through [OpenRouter](https://openrouter.ai/all-hands/openhands-lm-32b-v0.1)


--- a/docs/modules/usage/llms/local-llms.md
+++ b/docs/modules/usage/llms/local-llms.md
@@ -15,7 +15,7 @@ It is highly recommended that you use GPUs to serve local models for optimal exp
 For example, to download [OpenHands LM 32B v0.1](https://huggingface.co/all-hands/openhands-lm-32b-v0.1):

 ```bash
-huggingface-cli download all-hands/openhands-lm-32b-v0.1 --local-dir all-hands/openhands-lm-32b-v0.1
+huggingface-cli download all-hands/openhands-lm-32b-v0.1 --local-dir my_folder/openhands-lm-32b-v0.1
 ```

 ## Create an OpenAI-Compatible Endpoint With a Model Serving Framework
@@ -27,7 +27,7 @@ huggingface-cli download all-hands/openhands-lm-32b-v0.1 --local-dir all-hands/o

 ```bash
 SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 python3 -m sglang.launch_server \
-    --model all-hands/openhands-lm-32b-v0.1 \
+    --model my_folder/openhands-lm-32b-v0.1 \
    --served-model-name openhands-lm-32b-v0.1 \
    --port 8000 \
    --tp 2 --dp 1 \
@@ -41,7 +41,7 @@ SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 python3 -m sglang.launch_server \
 - Example launch command for OpenHands LM 32B (with at least 2 GPUs):

 ```bash
-vllm serve all-hands/openhands-lm-32b-v0.1 \
+vllm serve my_folder/openhands-lm-32b-v0.1 \
    --host 0.0.0.0 --port 8000 \
    --api-key mykey \
    --tensor-parallel-size 2 \
@@ -67,7 +67,7 @@ Ensure `config.toml` exists by running `make setup-config` which will create one
 workspace_base="/path/to/your/workspace"

 [llm]
-model="openhands-lm-32b-v0.1"
+embedding_model="local"
 ollama_base_url="http://localhost:8000"
 ```

--- a/docs/package-lock.json
+++ b/docs/package-lock.json
@@ -24,8 +24,6 @@
        "@docusaurus/module-type-aliases": "^3.5.1",
        "@docusaurus/tsconfig": "^3.7.0",
        "@docusaurus/types": "^3.5.1",
-        "swagger-cli": "^4.0.4",
-        "swagger-ui-dist": "^5.21.0",
        "typescript": "~5.8.3"
      },
      "engines": {
@@ -275,273 +273,6 @@
        "node": ">=6.0.0"
      }
    },
-    "node_modules/@apidevtools/openapi-schemas": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/@apidevtools/openapi-schemas/-/openapi-schemas-2.1.0.tgz",
-      "integrity": "sha512-Zc1AlqrJlX3SlpupFGpiLi2EbteyP7fXmUOGup6/DnkRgjP9bgMM/ag+n91rsv0U1Gpz0H3VILA/o3bW7Ua6BQ==",
-      "dev": true,
-      "license": "MIT",
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/@apidevtools/swagger-cli": {
-      "version": "4.0.4",
-      "resolved": "https://registry.npmjs.org/@apidevtools/swagger-cli/-/swagger-cli-4.0.4.tgz",
-      "integrity": "sha512-hdDT3B6GLVovCsRZYDi3+wMcB1HfetTU20l2DC8zD3iFRNMC6QNAZG5fo/6PYeHWBEv7ri4MvnlKodhNB0nt7g==",
-      "deprecated": "This package has been abandoned. Please switch to using the actively maintained @redocly/cli",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "@apidevtools/swagger-parser": "^10.0.1",
-        "chalk": "^4.1.0",
-        "js-yaml": "^3.14.0",
-        "yargs": "^15.4.1"
-      },
-      "bin": {
-        "swagger-cli": "bin/swagger-cli.js"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/@apidevtools/swagger-cli/node_modules/argparse": {
-      "version": "1.0.10",
-      "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
-      "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "sprintf-js": "~1.0.2"
-      }
-    },
-    "node_modules/@apidevtools/swagger-cli/node_modules/camelcase": {
-      "version": "5.3.1",
-      "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz",
-      "integrity": "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==",
-      "dev": true,
-      "license": "MIT",
-      "engines": {
-        "node": ">=6"
-      }
-    },
-    "node_modules/@apidevtools/swagger-cli/node_modules/cliui": {
-      "version": "6.0.0",
-      "resolved": "https://registry.npmjs.org/cliui/-/cliui-6.0.0.tgz",
-      "integrity": "sha512-t6wbgtoCXvAzst7QgXxJYqPt0usEfbgQdftEPbLL/cvv6HPE5VgvqCuAIDR0NgU52ds6rFwqrgakNLrHEjCbrQ==",
-      "dev": true,
-      "license": "ISC",
-      "dependencies": {
-        "string-width": "^4.2.0",
-        "strip-ansi": "^6.0.0",
-        "wrap-ansi": "^6.2.0"
-      }
-    },
-    "node_modules/@apidevtools/swagger-cli/node_modules/emoji-regex": {
-      "version": "8.0.0",
-      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
-      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
-      "dev": true,
-      "license": "MIT"
-    },
-    "node_modules/@apidevtools/swagger-cli/node_modules/find-up": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz",
-      "integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "locate-path": "^5.0.0",
-        "path-exists": "^4.0.0"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/@apidevtools/swagger-cli/node_modules/js-yaml": {
-      "version": "3.14.1",
-      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz",
-      "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "argparse": "^1.0.7",
-        "esprima": "^4.0.0"
-      },
-      "bin": {
-        "js-yaml": "bin/js-yaml.js"
-      }
-    },
-    "node_modules/@apidevtools/swagger-cli/node_modules/locate-path": {
-      "version": "5.0.0",
-      "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz",
-      "integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "p-locate": "^4.1.0"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/@apidevtools/swagger-cli/node_modules/p-limit": {
-      "version": "2.3.0",
-      "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz",
-      "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "p-try": "^2.0.0"
-      },
-      "engines": {
-        "node": ">=6"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/@apidevtools/swagger-cli/node_modules/p-locate": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz",
-      "integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "p-limit": "^2.2.0"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/@apidevtools/swagger-cli/node_modules/path-exists": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
-      "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==",
-      "dev": true,
-      "license": "MIT",
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/@apidevtools/swagger-cli/node_modules/string-width": {
-      "version": "4.2.3",
-      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
-      "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "emoji-regex": "^8.0.0",
-        "is-fullwidth-code-point": "^3.0.0",
-        "strip-ansi": "^6.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/@apidevtools/swagger-cli/node_modules/wrap-ansi": {
-      "version": "6.2.0",
-      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-6.2.0.tgz",
-      "integrity": "sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "ansi-styles": "^4.0.0",
-        "string-width": "^4.1.0",
-        "strip-ansi": "^6.0.0"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/@apidevtools/swagger-cli/node_modules/y18n": {
-      "version": "4.0.3",
-      "resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.3.tgz",
-      "integrity": "sha512-JKhqTOwSrqNA1NY5lSztJ1GrBiUodLMmIZuLiDaMRJ+itFd+ABVE8XBjOvIWL+rSqNDC74LCSFmlb/U4UZ4hJQ==",
-      "dev": true,
-      "license": "ISC"
-    },
-    "node_modules/@apidevtools/swagger-cli/node_modules/yargs": {
-      "version": "15.4.1",
-      "resolved": "https://registry.npmjs.org/yargs/-/yargs-15.4.1.tgz",
-      "integrity": "sha512-aePbxDmcYW++PaqBsJ+HYUFwCdv4LVvdnhBy78E57PIor8/OVvhMrADFFEDh8DHDFRv/O9i3lPhsENjO7QX0+A==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "cliui": "^6.0.0",
-        "decamelize": "^1.2.0",
-        "find-up": "^4.1.0",
-        "get-caller-file": "^2.0.1",
-        "require-directory": "^2.1.1",
-        "require-main-filename": "^2.0.0",
-        "set-blocking": "^2.0.0",
-        "string-width": "^4.2.0",
-        "which-module": "^2.0.0",
-        "y18n": "^4.0.0",
-        "yargs-parser": "^18.1.2"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/@apidevtools/swagger-cli/node_modules/yargs-parser": {
-      "version": "18.1.3",
-      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-18.1.3.tgz",
-      "integrity": "sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==",
-      "dev": true,
-      "license": "ISC",
-      "dependencies": {
-        "camelcase": "^5.0.0",
-        "decamelize": "^1.2.0"
-      },
-      "engines": {
-        "node": ">=6"
-      }
-    },
-    "node_modules/@apidevtools/swagger-methods": {
-      "version": "3.0.2",
-      "resolved": "https://registry.npmjs.org/@apidevtools/swagger-methods/-/swagger-methods-3.0.2.tgz",
-      "integrity": "sha512-QAkD5kK2b1WfjDS/UQn/qQkbwF31uqRjPTrsCs5ZG9BQGAkjwvqGFjjPqAuzac/IYzpPtRzjCP1WrTuAIjMrXg==",
-      "dev": true,
-      "license": "MIT"
-    },
-    "node_modules/@apidevtools/swagger-parser": {
-      "version": "10.1.1",
-      "resolved": "https://registry.npmjs.org/@apidevtools/swagger-parser/-/swagger-parser-10.1.1.tgz",
-      "integrity": "sha512-u/kozRnsPO/x8QtKYJOqoGtC4kH6yg1lfYkB9Au0WhYB0FNLpyFusttQtvhlwjtG3rOwiRz4D8DnnXa8iEpIKA==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "@apidevtools/json-schema-ref-parser": "11.7.2",
-        "@apidevtools/openapi-schemas": "^2.1.0",
-        "@apidevtools/swagger-methods": "^3.0.2",
-        "@jsdevtools/ono": "^7.1.3",
-        "ajv": "^8.17.1",
-        "ajv-draft-04": "^1.0.0",
-        "call-me-maybe": "^1.0.2"
-      },
-      "peerDependencies": {
-        "openapi-types": ">=7"
-      }
-    },
-    "node_modules/@apidevtools/swagger-parser/node_modules/@apidevtools/json-schema-ref-parser": {
-      "version": "11.7.2",
-      "resolved": "https://registry.npmjs.org/@apidevtools/json-schema-ref-parser/-/json-schema-ref-parser-11.7.2.tgz",
-      "integrity": "sha512-4gY54eEGEstClvEkGnwVkTkrx0sqwemEFG5OSRRn3tD91XH0+Q8XIkYIfo7IwEWPpJZwILb9GUXeShtplRc/eA==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "@jsdevtools/ono": "^7.1.3",
-        "@types/json-schema": "^7.0.15",
-        "js-yaml": "^4.1.0"
-      },
-      "engines": {
-        "node": ">= 16"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/philsturgeon"
-      }
-    },
    "node_modules/@babel/code-frame": {
      "version": "7.26.2",
      "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.26.2.tgz",
@@ -4104,13 +3835,6 @@
        "@jridgewell/sourcemap-codec": "^1.4.14"
      }
    },
-    "node_modules/@jsdevtools/ono": {
-      "version": "7.1.3",
-      "resolved": "https://registry.npmjs.org/@jsdevtools/ono/-/ono-7.1.3.tgz",
-      "integrity": "sha512-4JQNk+3mVzK3xh2rqd6RB4J46qUR19azEHBneZyTZM+c456qOrbbM/5xcR8huNCCcbVt7+UmizG6GuUvPvKUYg==",
-      "dev": true,
-      "license": "MIT"
-    },
    "node_modules/@leichtgewicht/ip-codec": {
      "version": "2.0.5",
      "resolved": "https://registry.npmjs.org/@leichtgewicht/ip-codec/-/ip-codec-2.0.5.tgz",
@@ -4246,14 +3970,6 @@
      "integrity": "sha512-8LduaNlMZGwdZ6qWrKlfa+2M4gahzFkprZiAt2TF8uS0qQgBizKXpXURqvTJ4WtmupWxaLqjRb2UCTe72mu+Aw==",
      "license": "MIT"
    },
-    "node_modules/@scarf/scarf": {
-      "version": "1.4.0",
-      "resolved": "https://registry.npmjs.org/@scarf/scarf/-/scarf-1.4.0.tgz",
-      "integrity": "sha512-xxeapPiUXdZAE3che6f3xogoJPeZgig6omHEy1rIY5WVsB3H2BHNnZH+gHG6x91SCWyQCzWGsuL2Hh3ClO5/qQ==",
-      "dev": true,
-      "hasInstallScript": true,
-      "license": "Apache-2.0"
-    },
    "node_modules/@sideway/address": {
      "version": "4.1.5",
      "resolved": "https://registry.npmjs.org/@sideway/address/-/address-4.1.5.tgz",
@@ -5251,21 +4967,6 @@
        "url": "https://github.com/sponsors/epoberezkin"
      }
    },
-    "node_modules/ajv-draft-04": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/ajv-draft-04/-/ajv-draft-04-1.0.0.tgz",
-      "integrity": "sha512-mv00Te6nmYbRp5DCwclxtt7yV/joXJPGS7nM+97GdxvuttCOfgI3K4U25zboyeX0O+myI8ERluxQe5wljMmVIw==",
-      "dev": true,
-      "license": "MIT",
-      "peerDependencies": {
-        "ajv": "^8.5.0"
-      },
-      "peerDependenciesMeta": {
-        "ajv": {
-          "optional": true
-        }
-      }
-    },
    "node_modules/ajv-formats": {
      "version": "2.1.1",
      "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-2.1.1.tgz",
@@ -5848,13 +5549,6 @@
        "url": "https://github.com/sponsors/ljharb"
      }
    },
-    "node_modules/call-me-maybe": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/call-me-maybe/-/call-me-maybe-1.0.2.tgz",
-      "integrity": "sha512-HpX65o1Hnr9HH25ojC1YGs7HCQLq0GCOibSaWER0eNpgJ/Z1MZv2mTc7+xh6WOPxbRVcmgbv4hGU+uSQ/2xFZQ==",
-      "dev": true,
-      "license": "MIT"
-    },
    "node_modules/callsites": {
      "version": "3.1.0",
      "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
@@ -7498,16 +7192,6 @@
        }
      }
    },
-    "node_modules/decamelize": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz",
-      "integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==",
-      "dev": true,
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
    "node_modules/decode-named-character-reference": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/decode-named-character-reference/-/decode-named-character-reference-1.0.2.tgz",
@@ -8916,16 +8600,6 @@
        "node": ">=6.9.0"
      }
    },
-    "node_modules/get-caller-file": {
-      "version": "2.0.5",
-      "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
-      "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
-      "dev": true,
-      "license": "ISC",
-      "engines": {
-        "node": "6.* || 8.* || >= 10.*"
-      }
-    },
    "node_modules/get-intrinsic": {
      "version": "1.2.7",
      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.7.tgz",
@@ -13436,16 +13110,15 @@
      }
    },
    "node_modules/nanoid": {
-      "version": "3.3.11",
-      "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz",
-      "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==",
+      "version": "3.3.7",
+      "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz",
+      "integrity": "sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==",
      "funding": [
        {
          "type": "github",
          "url": "https://github.com/sponsors/ai"
        }
      ],
-      "license": "MIT",
      "bin": {
        "nanoid": "bin/nanoid.cjs"
      },
@@ -13754,14 +13427,6 @@
        "url": "https://github.com/sponsors/sindresorhus"
      }
    },
-    "node_modules/openapi-types": {
-      "version": "12.1.3",
-      "resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz",
-      "integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==",
-      "dev": true,
-      "license": "MIT",
-      "peer": true
-    },
    "node_modules/opener": {
      "version": "1.5.2",
      "resolved": "https://registry.npmjs.org/opener/-/opener-1.5.2.tgz",
@@ -14144,9 +13809,9 @@
      }
    },
    "node_modules/postcss": {
-      "version": "8.4.49",
-      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.49.tgz",
-      "integrity": "sha512-OCVPnIObs4N29kxTjzLfUryOkvZEq+pf8jTF0lg8E7uETuWHA+v7j3c/xJmiqpX450191LlmZfUKkXxkTry7nA==",
+      "version": "8.4.38",
+      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.38.tgz",
+      "integrity": "sha512-Wglpdk03BSfXkHoQa3b/oulrotAkwrlLDRSOb9D0bN86FdRyE9lppSp33aHNPgBa0JKCoB+drFLZkQoRRYae5A==",
      "funding": [
        {
          "type": "opencollective",
@@ -14161,11 +13826,10 @@
          "url": "https://github.com/sponsors/ai"
        }
      ],
-      "license": "MIT",
      "dependencies": {
        "nanoid": "^3.3.7",
-        "picocolors": "^1.1.1",
-        "source-map-js": "^1.2.1"
+        "picocolors": "^1.0.0",
+        "source-map-js": "^1.2.0"
      },
      "engines": {
        "node": "^10 || ^12 || >=14"
@@ -15703,15 +15367,6 @@
        "node": ">= 0.10"
      }
    },
-    "node_modules/punycode": {
-      "version": "2.3.1",
-      "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
-      "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=6"
-      }
-    },
    "node_modules/pupa": {
      "version": "3.1.0",
      "resolved": "https://registry.npmjs.org/pupa/-/pupa-3.1.0.tgz",
@@ -16550,16 +16205,6 @@
        "node": ">=0.10"
      }
    },
-    "node_modules/require-directory": {
-      "version": "2.1.1",
-      "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
-      "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
-      "dev": true,
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
    "node_modules/require-from-string": {
      "version": "2.0.2",
      "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
@@ -16577,13 +16222,6 @@
        "node": "*"
      }
    },
-    "node_modules/require-main-filename": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/require-main-filename/-/require-main-filename-2.0.0.tgz",
-      "integrity": "sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg==",
-      "dev": true,
-      "license": "ISC"
-    },
    "node_modules/requires-port": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz",
@@ -17064,13 +16702,6 @@
        "node": ">= 0.8.0"
      }
    },
-    "node_modules/set-blocking": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz",
-      "integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==",
-      "dev": true,
-      "license": "ISC"
-    },
    "node_modules/set-function-length": {
      "version": "1.2.2",
      "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz",
@@ -17349,10 +16980,9 @@
      }
    },
    "node_modules/source-map-js": {
-      "version": "1.2.1",
-      "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
-      "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==",
-      "license": "BSD-3-Clause",
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.0.tgz",
+      "integrity": "sha512-itJW8lvSA0TXEphiRoawsCksnlf8SyvmFzIhltqAHluXd88pkCd+cXJVHTDwdCr0IzwptSm035IHQktUu1QUMg==",
      "engines": {
        "node": ">=0.10.0"
      }
@@ -17717,32 +17347,6 @@
      "resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.0.30.tgz",
      "integrity": "sha512-GaqWWShW4kv/G9IEucWScBx9G1/vsFZZJUO+tD26M8J8z3Kw5RDQjaoZe03YAClgeS/SWPOcb4nkFBTEi5DUEA=="
    },
-    "node_modules/swagger-cli": {
-      "version": "4.0.4",
-      "resolved": "https://registry.npmjs.org/swagger-cli/-/swagger-cli-4.0.4.tgz",
-      "integrity": "sha512-Cp8YYuLny3RJFQ4CvOBTaqmOOgYsem52dPx1xM5S4EUWFblIh2Q8atppMZvXKUr1e9xH5RwipYpmdUzdPcxWcA==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "@apidevtools/swagger-cli": "4.0.4"
-      },
-      "bin": {
-        "swagger-cli": "swagger-cli.js"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/swagger-ui-dist": {
-      "version": "5.21.0",
-      "resolved": "https://registry.npmjs.org/swagger-ui-dist/-/swagger-ui-dist-5.21.0.tgz",
-      "integrity": "sha512-E0K3AB6HvQd8yQNSMR7eE5bk+323AUxjtCz/4ZNKiahOlPhPJxqn3UPIGs00cyY/dhrTDJ61L7C/a8u6zhGrZg==",
-      "dev": true,
-      "license": "Apache-2.0",
-      "dependencies": {
-        "@scarf/scarf": "=1.4.0"
-      }
-    },
    "node_modules/tapable": {
      "version": "2.2.1",
      "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz",
@@ -18345,6 +17949,14 @@
        "punycode": "^2.1.0"
      }
    },
+    "node_modules/uri-js/node_modules/punycode": {
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
+      "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==",
+      "engines": {
+        "node": ">=6"
+      }
+    },
    "node_modules/url-loader": {
      "version": "4.1.1",
      "resolved": "https://registry.npmjs.org/url-loader/-/url-loader-4.1.1.tgz",
@@ -18998,13 +18610,6 @@
        "node": ">= 8"
      }
    },
-    "node_modules/which-module": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/which-module/-/which-module-2.0.1.tgz",
-      "integrity": "sha512-iBdZ57RDvnOR9AGBhML2vFZf7h8vmBjhoaZqODJBFWHVtKkDmKuHai3cx5PgVMrX5YDNp27AofYbAwctSS+vhQ==",
-      "dev": true,
-      "license": "ISC"
-    },
    "node_modules/widest-line": {
      "version": "4.0.1",
      "resolved": "https://registry.npmjs.org/widest-line/-/widest-line-4.0.1.tgz",
--- a/docs/package.json
+++ b/docs/package.json
@@ -4,18 +4,16 @@
  "private": true,
  "scripts": {
    "docusaurus": "docusaurus",
-    "start": "node generate-swagger-ui.js && docusaurus start",
-    "build": "node generate-swagger-ui.js && docusaurus build",
+    "start": "docusaurus start",
+    "build": "docusaurus build",
    "swizzle": "docusaurus swizzle",
    "deploy": "docusaurus deploy",
    "clear": "docusaurus clear",
    "serve": "docusaurus serve",
    "write-translations": "docusaurus write-translations",
    "write-heading-ids": "docusaurus write-heading-ids",
-    "typecheck": "tsc",
-    "generate-swagger-ui": "node generate-swagger-ui.js"
+    "typecheck": "tsc"
  },
-  "// Note": "The OpenAPI spec is stored in docs/static/openapi.json so it's accessible at /openapi.json in the deployed site",
  "dependencies": {
    "@docusaurus/core": "^3.7.0",
    "@docusaurus/plugin-content-pages": "^3.7.0",
@@ -33,8 +31,6 @@
    "@docusaurus/module-type-aliases": "^3.5.1",
    "@docusaurus/tsconfig": "^3.7.0",
    "@docusaurus/types": "^3.5.1",
-    "swagger-cli": "^4.0.4",
-    "swagger-ui-dist": "^5.21.0",
    "typescript": "~5.8.3"
  },
  "browserslist": {
@@ -51,6 +47,5 @@
  },
  "engines": {
    "node": ">=18.0"
-  },
-  "packageManager": "yarn@1.22.22+sha512.a6b2f7906b721bba3d67d4aff083df04dad64c399707841b7acf00f6b133b7ac24255f2652fa22ae3534329dc6180534e98d17432037ff6fd140556e2bb3137e"
+  }
 }
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@@ -268,4 +268,4 @@ const sidebars: SidebarsConfig = {
  ],
 };

-export default sidebars;
+export default sidebars;
--- a/docs/static/README.md
+++ b/docs/static/README.md
@@ -1,15 +0,0 @@
-# Static Files for OpenHands Documentation
-
-This directory contains static files that are copied directly to the build output of the Docusaurus documentation.
-
-## OpenAPI Specification
-
-The `openapi.json` file in this directory is the OpenAPI specification for the OpenHands API. It is copied to the build output and is accessible at `/openapi.json` in the deployed site.
-
-This file is used by the Swagger UI interface, which is accessible at `/swagger-ui/` in the deployed site.
-
-## Why is the OpenAPI spec in the static directory?
-
-The OpenAPI specification is placed in the static directory so that it's accessible at a predictable URL in the deployed site. This allows the Swagger UI to reference it directly.
-
-We only need one copy of the OpenAPI spec file, which is this one in the static directory.
--- a/docs/static/img/oh-features.png
+++ b/docs/static/img/oh-features.png
--- a/docs/static/openapi.json
+++ b/docs/static/openapi.json
--- a/docs/yarn.lock
+++ b/docs/yarn.lock
--- a/evaluation/benchmarks/EDA/run_infer.py
+++ b/evaluation/benchmarks/EDA/run_infer.py
@@ -14,6 +14,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
@@ -74,6 +75,7 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
+    agent_config = update_agent_config_for_eval(agent_config)
    agent_config.enable_prompt_extensions = False
    return config

--- a/evaluation/benchmarks/agent_bench/run_infer.py
+++ b/evaluation/benchmarks/agent_bench/run_infer.py
@@ -22,6 +22,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
@@ -55,6 +56,8 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
+
+    agent_config = update_agent_config_for_eval(agent_config)
    agent_config.enable_prompt_extensions = False
    return config

--- a/evaluation/benchmarks/aider_bench/run_infer.py
+++ b/evaluation/benchmarks/aider_bench/run_infer.py
@@ -21,6 +21,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
@@ -61,6 +62,8 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
+
+    agent_config = update_agent_config_for_eval(agent_config)
    agent_config.enable_prompt_extensions = False

    # copy 'draft_editor' config if exists
--- a/evaluation/benchmarks/biocoder/run_infer.py
+++ b/evaluation/benchmarks/biocoder/run_infer.py
@@ -19,6 +19,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
@@ -72,6 +73,8 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
+
+    agent_config = update_agent_config_for_eval(agent_config)
    agent_config.enable_prompt_extensions = False
    return config

--- a/evaluation/benchmarks/bird/run_infer.py
+++ b/evaluation/benchmarks/bird/run_infer.py
@@ -22,6 +22,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
@@ -86,6 +87,8 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
+
+    agent_config = update_agent_config_for_eval(agent_config)
    agent_config.enable_prompt_extensions = False
    return config

--- a/evaluation/benchmarks/browsing_delegation/run_infer.py
+++ b/evaluation/benchmarks/browsing_delegation/run_infer.py
@@ -15,6 +15,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
@@ -50,6 +51,8 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
+
+    agent_config = update_agent_config_for_eval(agent_config)
    agent_config.enable_prompt_extensions = False
    return config

--- a/evaluation/benchmarks/commit0/run_infer.py
+++ b/evaluation/benchmarks/commit0/run_infer.py
@@ -20,6 +20,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
    update_llm_config_for_completions_logging,
 )
 from openhands.controller.state.state import State
@@ -135,6 +136,7 @@ def get_config(
        enable_browsing=RUN_WITH_BROWSING,
        enable_llm_editor=False,
    )
+    agent_config = update_agent_config_for_eval(agent_config)
    config.set_agent_config(agent_config)
    return config

--- a/evaluation/benchmarks/discoverybench/run_infer.py
+++ b/evaluation/benchmarks/discoverybench/run_infer.py
@@ -21,6 +21,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
@@ -76,6 +77,8 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
+
+    agent_config = update_agent_config_for_eval(agent_config)
    agent_config.enable_prompt_extensions = False
    agent_config = AgentConfig(
        function_calling=False,
--- a/evaluation/benchmarks/gaia/run_infer.py
+++ b/evaluation/benchmarks/gaia/run_infer.py
@@ -18,6 +18,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
@@ -66,6 +67,8 @@ def get_config(
    else:
        logger.info('Agent config not provided, using default settings')
        agent_config = config.get_agent_config(metadata.agent_class)
+
+        agent_config = update_agent_config_for_eval(agent_config)
        agent_config.enable_prompt_extensions = False
    return config

--- a/evaluation/benchmarks/gorilla/run_infer.py
+++ b/evaluation/benchmarks/gorilla/run_infer.py
@@ -16,6 +16,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
@@ -54,6 +55,8 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
+
+    agent_config = update_agent_config_for_eval(agent_config)
    agent_config.enable_prompt_extensions = False
    return config

--- a/evaluation/benchmarks/gpqa/run_infer.py
+++ b/evaluation/benchmarks/gpqa/run_infer.py
@@ -34,6 +34,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
@@ -75,6 +76,8 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
+
+    agent_config = update_agent_config_for_eval(agent_config)
    agent_config.enable_prompt_extensions = False
    return config

--- a/evaluation/benchmarks/humanevalfix/run_infer.py
+++ b/evaluation/benchmarks/humanevalfix/run_infer.py
@@ -27,6 +27,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
@@ -96,6 +97,8 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
+
+    agent_config = update_agent_config_for_eval(agent_config)
    agent_config.enable_prompt_extensions = False
    return config

--- a/evaluation/benchmarks/lca_ci_build_repair/.gitignore
+++ b/evaluation/benchmarks/lca_ci_build_repair/.gitignore
@@ -1 +0,0 @@
-config.yaml
--- a/evaluation/benchmarks/lca_ci_build_repair/README.MD
+++ b/evaluation/benchmarks/lca_ci_build_repair/README.MD
@@ -1,35 +0,0 @@
-# CI Builds Repair Benchmark Integration
-
-This module integrates the CI Builds Repair benchmark developed by [JetBrains-Research](https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair/ci-builds-repair-benchmark).
-
-For more information, refer to the [GitHub repository](https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair/ci-builds-repair-benchmark) and the associated [research paper](https://arxiv.org/abs/2406.11612).
-See notice below for details
-
-## Setup
-
-Before running any scripts, make sure to configure the benchmark by setting up `config.yaml`.
-This benchmark pushes to JetBrains' private GitHub repository. You will to request a `token_gh` provided by their team, to run this benchmark.
-
-## Inference
-
-To run inference with your model:
-
-```bash
-./evaluation/benchmarks/lca_ci_build_repair/scripts/run_infer.sh llm.yourmodel
-```
-
-## Evaluation
-
-To evaluate the predictions:
-
-```bash
-./evaluation/benchmarks/lca_ci_build_repair/scripts/eval_infer.sh predictions_path_containing_output
-```
-
-## Results
-The benchmark contains 68 instances, we skip instances #126 and #145, and only run 66 instances due to dockerization errors.
-
-Due to running in live GitHub machines, the benchmark is sensitive to the date it is run. Even the golden patches in the dataset might present failures due to updates.
-For example, on 2025-04-09, running the benchmark against the golden patches gave 57/67 successes, with 1 job left in the waiting list.
-
-On 2025-04-10, running the benchmark full with OH and no oracle, 37 succeeded. That is 54% of the complete set of 68 instances and 64% of the 57 that succeed with golden patches.
--- a/evaluation/benchmarks/lca_ci_build_repair/config_template.yaml
+++ b/evaluation/benchmarks/lca_ci_build_repair/config_template.yaml
@@ -1,11 +0,0 @@
-LCA_PATH: path #where to clone lca-ci rep
-model_name: OpenHands
-benchmark_owner: ICML-25-BenchName-builds-repair
-token_gh: your_token
-#for lca-ci-repo
-repos_folder: /path/to/repos # here the cloned repos would be stored
-out_folder: /out/folder # here the result files would be stored
-data_cache_dir: /data/cache/dir/ # here the cached dataset would be stored
-username_gh: username-gh # your GitHub username
-# test_username: test_user # username that would be displayed in the benchmark. Optional. If ommitted, username_gh would be used
-language: Python # dataset language (now only Python is available)
--- a/evaluation/benchmarks/lca_ci_build_repair/eval_infer.py
+++ b/evaluation/benchmarks/lca_ci_build_repair/eval_infer.py
@@ -1,242 +0,0 @@
-"""Implements evaluation on JetBrains CI builds repair baselines
-
-Please see https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair
-and https://huggingface.co/datasets/JetBrains-Research/lca-ci-builds-repair
-
-TODOs:
- Add more flags
-"""
-
-import json
-import os
-from pathlib import Path
-
-import ruamel.yaml
-
-from evaluation.utils.shared import (
-    EvalMetadata,
-    get_default_sandbox_config_for_eval,
-    make_metadata,
-)
-from openhands.core.config import (
-    AppConfig,
-    LLMConfig,
-    get_parser,
-    load_app_config,
-)
-from openhands.core.logger import openhands_logger as logger
-from openhands.core.main import create_runtime
-from openhands.events.action import CmdRunAction
-from openhands.events.observation import CmdOutputObservation
-from openhands.runtime.base import Runtime
-from openhands.utils.async_utils import call_async_from_sync
-
-
-def get_config(
-    metadata: EvalMetadata,
-) -> AppConfig:
-    sandbox_config = get_default_sandbox_config_for_eval()
-    sandbox_config.base_container_image = 'python:3.12-bookworm'
-    config = AppConfig(
-        default_agent=metadata.agent_class,
-        run_as_openhands=False,
-        runtime='docker',
-        max_iterations=metadata.max_iterations,
-        sandbox=sandbox_config,
-        # do not mount workspace
-        workspace_base=None,
-        workspace_mount_path=None,
-    )
-    config.set_llm_config(metadata.llm_config)
-    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.enable_prompt_extensions = False
-    return config
-
-
-config = load_app_config()
-
-
-def load_bench_config():
-    script_dir = os.path.dirname(
-        os.path.abspath(__file__)
-    )  # Get the absolute path of the script
-    config_path = os.path.join(script_dir, 'config.yaml')
-    yaml = ruamel.yaml.YAML(typ='rt')
-    with open(config_path, 'r') as file:
-        return yaml.load(file)
-
-
-bench_config = load_bench_config()
-
-
-def run_eval(
-    runtime: Runtime,
-):
-    """Run the evaluation and create report"""
-    logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
-    obs: CmdOutputObservation
-
-    lca_path = bench_config['LCA_PATH']
-    lca_ci_path = os.path.join(
-        lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
-    )
-
-    model_name = bench_config['model_name']
-
-    action = CmdRunAction(command=f'mkdir {lca_path}')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    assert obs.exit_code == 0
-
-    action = CmdRunAction(command=f'cd {lca_path}')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    assert obs.exit_code == 0
-
-    lca_repo_url = 'https://github.com/juanmichelini/lca-baselines'
-    action = CmdRunAction(command=f'git clone {lca_repo_url}')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    assert obs.exit_code == 0
-
-    action = CmdRunAction(command=f'cd {lca_ci_path}')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    assert obs.exit_code == 0
-
-    action = CmdRunAction(command='git switch open-hands-integration')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    assert obs.exit_code == 0
-
-    script_dir = os.path.dirname(
-        os.path.abspath(__file__)
-    )  # Get the absolute path of the script
-    config_path = os.path.join(script_dir, 'config.yaml')
-    runtime.copy_to(config_path, lca_ci_path)
-
-    token_gh = bench_config['token_gh']
-    commandf = f'export TOKEN_GH={token_gh}'
-    action = CmdRunAction(command=commandf)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-
-    action = CmdRunAction(command='poetry install')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-
-    # Set up the task environment
-    commandf = f'poetry run python run_eval_jobs.py --model-name "{model_name}" --config-path "{lca_ci_path}/config.yaml" --job-ids-file "/tmp/output_lca.jsonl" --result-filename "testfile.jsonl"  > /tmp/single_output.txt'
-    action = CmdRunAction(command=commandf)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(f'run_eval_jobs.py gave {obs.content} !')
-    # assert obs.exit_code == 0
-
-    commandf = 'cat /tmp/single_output.txt'
-    action = CmdRunAction(command=commandf)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(f' {commandf} gave {obs.content}!')
-
-    testfile_path = os.path.join(bench_config['out_folder'], 'testfile.jsonl')
-    commandf = f'cat {testfile_path}'
-    action = CmdRunAction(command=commandf)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    report_str = obs.content
-
-    logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
-    return report_str
-
-
-def process_predictions(predictions_path: str):
-    output_path = Path(predictions_path)
-    if output_path.suffix != '.jsonl':
-        raise ValueError('output_path must end in .jsonl')
-
-    output_lca_path = output_path.with_name(output_path.stem + '_lca.jsonl')
-
-    with output_path.open() as infile, output_lca_path.open('w') as outfile:
-        for line in infile:
-            data = json.loads(line)
-            json.dump(data.get('test_result'), outfile)
-            outfile.write('\n')
-
-    return str(output_lca_path)
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    parser.add_argument(
-        '-s',
-        '--eval-split',
-        type=str,
-        default='test',
-        choices=['test'],
-        help='data split to evaluate on, must be test',
-    )
-    parser.add_argument(
-        '--predictions-path',
-        type=str,
-        help='Path to the directory containing the output.jsonl with the predictions.',
-    )
-    args, _ = parser.parse_known_args()
-
-    data_split = args.eval_split
-
-    llm_config = LLMConfig(model='dummy_model')
-
-    metadata = make_metadata(
-        llm_config,
-        f'jetbrains-lca-ci--{data_split}',
-        args.agent_cls,
-        args.max_iterations,
-        args.eval_note,
-        args.predictions_path,
-    )
-
-    # prepare image
-    config = get_config(metadata)
-    runtime = create_runtime(config)
-    call_async_from_sync(runtime.connect)
-    logger.info('Converting output.jsonl into output_lca.jsonl')
-    predictions_lca_path = process_predictions(
-        os.path.join(args.predictions_path, 'output.jsonl')
-    )
-    runtime.copy_to(predictions_lca_path, '/tmp')
-
-    # get results
-    results_str = run_eval(runtime)
-    results_path = os.path.join(args.predictions_path, 'results.jsonl')
-    with open(results_path, 'w') as file:
-        file.write(results_str)
-    logger.info(f'Saved results to {results_path}')
-
-    # make a summary
-    resolved_instances = []
-    unresolved_instances = []
-    for line in results_str.strip().splitlines():
-        data = json.loads(line)
-        conclusion = data.get('conclusion')
-        if conclusion == 'success':
-            resolved_instances.append(data)
-        elif conclusion == 'failure':
-            unresolved_instances.append(data)
-
-    completed_instances = resolved_instances + unresolved_instances
-
-    report = {
-        'success': len(resolved_instances),
-        'failure': len(unresolved_instances),
-        'resolved_instances': resolved_instances,
-        'unresolved_instances': unresolved_instances,
-        'completed_instances': completed_instances,
-    }
-
-    print(f'Results: {report}')
-    report_path = os.path.join(args.predictions_path, 'report.jsonl')
-    with open(report_path, 'w') as out_f:
-        out_f.write(json.dumps(report) + '\n')
-
-    logger.info(f'Saved report of results in swebench format to {report_path}')
--- a/evaluation/benchmarks/lca_ci_build_repair/run_infer.py
+++ b/evaluation/benchmarks/lca_ci_build_repair/run_infer.py
@@ -1,406 +0,0 @@
-"""Implements inference on JetBrains CI builds repair baselines
-
-Please see https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair
-and https://huggingface.co/datasets/JetBrains-Research/lca-ci-builds-repair
-
-TODOs:
- Add EXP_NAME
-"""
-
-import asyncio
-import json
-import os
-from typing import Any
-
-import pandas as pd
-import ruamel.yaml
-from datasets import load_dataset
-
-from evaluation.utils.shared import (
-    EvalMetadata,
-    EvalOutput,
-    codeact_user_response,
-    compatibility_for_eval_history_pairs,
-    get_default_sandbox_config_for_eval,
-    make_metadata,
-    prepare_dataset,
-    reset_logger_for_multiprocessing,
-    run_evaluation,
-)
-from openhands.controller.state.state import State
-from openhands.core.config import (
-    AppConfig,
-    get_llm_config_arg,
-    get_parser,
-    load_app_config,
-)
-from openhands.core.logger import openhands_logger as logger
-from openhands.core.main import create_runtime, run_controller
-from openhands.events.action import CmdRunAction, MessageAction
-from openhands.events.observation import CmdOutputObservation
-from openhands.runtime.base import Runtime
-from openhands.utils.async_utils import call_async_from_sync
-
-
-def get_config(
-    metadata: EvalMetadata,
-) -> AppConfig:
-    sandbox_config = get_default_sandbox_config_for_eval()
-    sandbox_config.base_container_image = 'python:3.12-bookworm'
-    config = AppConfig(
-        default_agent=metadata.agent_class,
-        run_as_openhands=False,
-        runtime='docker',
-        max_iterations=metadata.max_iterations,
-        sandbox=sandbox_config,
-        # do not mount workspace
-        workspace_base=None,
-        workspace_mount_path=None,
-    )
-    config.set_llm_config(metadata.llm_config)
-    agent_config = config.get_agent_config(metadata.agent_class)
-    agent_config.enable_prompt_extensions = False
-    return config
-
-
-config = load_app_config()
-
-
-def load_bench_config():
-    script_dir = os.path.dirname(
-        os.path.abspath(__file__)
-    )  # Get the absolute path of the script
-    config_path = os.path.join(script_dir, 'config.yaml')
-    yaml = ruamel.yaml.YAML(typ='rt')
-    with open(config_path, 'r') as file:
-        return yaml.load(file)
-
-
-bench_config = load_bench_config()
-
-AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
-    'CodeActAgent': codeact_user_response,
-}
-
-AGENT_CLS_TO_INST_SUFFIX = {
-    'CodeActAgent': 'When you think you have completed the task, please finish the interaction using the "finish" tool.\n'
-}
-
-
-def initialize_runtime(
-    runtime: Runtime,
-    instance: pd.Series,
-):
-    """Initialize the runtime for the agent.
-
-    This function is called before the runtime is used to run the agent.
-    """
-    logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
-    obs: CmdOutputObservation
-
-    lca_path = bench_config['LCA_PATH']
-    lca_ci_path = os.path.join(
-        lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
-    )
-
-    repo_name = instance['repo_name']
-    repos_path = bench_config['repos_folder']
-    repo_owner = instance['repo_owner']
-    repo_path = os.path.join(repos_path, f'{repo_owner}__{repo_name}')
-    model_name = bench_config['model_name']
-
-    action = CmdRunAction(command=f'mkdir {lca_path}')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    assert obs.exit_code == 0
-
-    action = CmdRunAction(command=f'cd {lca_path}')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    assert obs.exit_code == 0
-
-    lca_repo_url = 'https://github.com/juanmichelini/lca-baselines'
-    action = CmdRunAction(command=f'git clone {lca_repo_url}')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    assert obs.exit_code == 0
-
-    action = CmdRunAction(command=f'cd {lca_ci_path}')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    assert obs.exit_code == 0
-
-    action = CmdRunAction(command='git switch open-hands-integration')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    assert obs.exit_code == 0
-
-    script_dir = os.path.dirname(
-        os.path.abspath(__file__)
-    )  # Get the absolute path of the script
-    config_path = os.path.join(script_dir, 'config.yaml')
-    with open(config_path, 'r') as file:
-        config_as_text = file.read()
-
-    commandf = f"echo '{config_as_text}' > config.yaml"
-    action = CmdRunAction(command=commandf)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-
-    token_gh = bench_config['token_gh']
-    commandf = f'export TOKEN_GH={token_gh}'
-    action = CmdRunAction(command=commandf)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-
-    action = CmdRunAction(command='poetry install')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-
-    # Set up the task environment
-    commandf = f'poetry run python run_get_datapoint.py --model-name {model_name} --id {instance["id"]} > branch_name.txt'
-    action = CmdRunAction(command=commandf)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    if obs.exit_code != 0:
-        print(f'run_get_datapoint.py failed at {instance["id"]} with {obs.content}')
-    assert obs.exit_code == 0
-
-    commandf = 'cat branch_name.txt'
-    action = CmdRunAction(command=commandf)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    bench_config['user_branch_name'] = obs.content
-
-    # Navigate to the task's code path
-    action = CmdRunAction(command=f'cd {repo_path}')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-
-    logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
-
-
-def complete_runtime(
-    runtime: Runtime,
-    instance: pd.Series,
-) -> dict[str, Any]:
-    """Complete the runtime for the agent.
-
-    This function is called before the runtime is used to run the agent.
-    If you need to do something in the sandbox to get the correctness metric after
-    the agent has run, modify this function.
-    """
-    logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
-    obs: CmdOutputObservation
-
-    model_name = bench_config['model_name']
-
-    lca_path = bench_config['LCA_PATH']
-    lca_ci_path = os.path.join(
-        lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
-    )
-
-    user_branch_name = bench_config['user_branch_name']
-
-    token_gh = bench_config['token_gh']
-    commandf = f'export TOKEN_GH={token_gh}'
-    action = CmdRunAction(command=commandf)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-
-    # Navigate to the lca-baseslines scripts path
-    action = CmdRunAction(command=f'cd {lca_ci_path}')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    assert obs.exit_code == 0
-
-    commandf = f'poetry run python run_push_datapoint.py --id {instance["id"]} --model-name {model_name} --user-branch-name {user_branch_name} > single_output.json'
-    logger.info(f'Running push script: {commandf}')
-    action = CmdRunAction(command=commandf)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    # assert obs.exit_code == 0
-
-    commandf = 'cat single_output.json'
-    action = CmdRunAction(command=commandf)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    result = json.loads(obs.content)
-
-    logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
-
-    return result
-
-
-def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = True):
-    config = get_config(metadata)
-
-    # Setup the logger properly, so you can run multi-processing to parallelize the evaluation
-    if reset_logger:
-        log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
-        reset_logger_for_multiprocessing(logger, instance['instance_id'], log_dir)
-    else:
-        logger.info(f'Starting evaluation for instance {instance["instance_id"]}.')
-
-    repo_name = instance['repo_name']
-    repo_workflow = instance['workflow_path']
-    repo_logs = instance['logs']
-    repos_path = bench_config['repos_folder']
-    repo_owner = instance['repo_owner']
-    repo_path = os.path.join(repos_path, f'{repo_owner}__{repo_name}')
-
-    # Prepare the task instruction
-    instruction_no_oracle = f"""
-<uploaded_files>
-{repo_path}
-</uploaded_files>
-
-I've uploaded a python code repository in the directory {repo_path}, Consider the following issue:
-
-<issue_description>
-The repository must pass the CI workflow {repo_workflow}.
-but it gave the following error
-{repo_logs}
-</issue_description>
-
-Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?
-I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!
-Also the development Python environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.
-Your task is to make the minimal changes to non-test files in the {repo_path} directory to ensure the <issue_description> is satisfied.
-
-Follow these phases to resolve the issue:
-
-Phase 1. READING: read the problem and reword it in clearer terms
-   1.1 If there are code or config snippets. Express in words any best practices or conventions in them.
-   1.2 Hightlight message errors, method names, variables, file names, stack traces, and technical details.
-   1.3 Explain the problem in clear terms.
-   1.4 Enumerate the steps to reproduce the problem.
-   1.5 Hightlight any best practices to take into account when testing and fixing the issue
-
-Phase 2. RUNNING: install and run the tests on the repository
-   2.1 Follow the readme
-   2.2 Install the environment and anything needed
-   2.2 Iterate and figure out how to run the tests
-
-Phase 3. EXPLORATION: find the files that are related to the problem and possible solutions
-   3.1 Use `grep` to search for relevant methods, classes, keywords and error messages.
-   3.2 Identify all files related to the problem statement.
-   3.3 Propose the methods and files to fix the issue and explain why.
-   3.4 From the possible file locations, select the most likely location to fix the issue.
-
-Phase 4. TEST CREATION: before implementing any fix, create a script to reproduce and verify the issue.
-   4.1 Look at existing test files in the repository to understand the test format/structure.
-   4.2 Create a minimal reproduction script that reproduces the located issue.
-   4.3 Run the reproduction script to confirm you are reproducing the issue.
-   4.4 Adjust the reproduction script as necessary.
-
-Phase 5. FIX ANALYSIS: state clearly the problem and how to fix it
-   5.1 State clearly what the problem is.
-   5.2 State clearly where the problem is located.
-   5.3 State clearly how the test reproduces the issue.
-   5.4 State clearly the best practices to take into account in the fix.
-   5.5 State clearly how to fix the problem.
-
-Phase 6. FIX IMPLEMENTATION: Edit the source code to implement your chosen solution.
-   6.1 Make minimal, focused changes to fix the issue.
-
-Phase 7. VERIFICATION: Test your implementation thoroughly.
-   7.1 Run your reproduction script to verify the fix works.
-   7.2 Add edge cases to your test script to ensure comprehensive coverage.
-   7.3 Run existing tests related to the modified code to ensure you haven't broken anything. Run any tests in the repository related to:
-     7.2.1 The issue you are fixing
-     7.2.2 The files you modified
-     7.2.3 The functions you changed
-   7.4 If any tests fail, revise your implementation until all tests pass
-
-Phase 8. REVIEW: Carefully re-read the problem description and compare your changes with the base commit {instance["sha_fail"]}.
-   8.1 Ensure you've fully addressed all requirements.
-
-Once all phases are done, announce: 'Agent Task Complete'.
-Be thorough in your exploration, testing, and reasoning. It's fine if your thinking process is lengthy - quality and completeness are more important than brevity.
-"""
-    runtime = create_runtime(config)
-    call_async_from_sync(runtime.connect)
-    initialize_runtime(runtime, instance)
-
-    # Run the agent
-    state: State | None = asyncio.run(
-        run_controller(
-            config=config,
-            initial_user_action=MessageAction(content=instruction_no_oracle),
-            runtime=runtime,
-            fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get(
-                metadata.agent_class
-            ),
-        )
-    )
-    assert state is not None
-    metrics = state.metrics.get() if state.metrics else {}
-
-    test_result = complete_runtime(runtime, instance)
-
-    # history is now available as a stream of events, rather than list of pairs of (Action, Observation)
-    # for compatibility with the existing output format, we can remake the pairs here
-    # remove when it becomes unnecessary
-    histories = compatibility_for_eval_history_pairs(state.history)
-
-    # Save the output
-    output = EvalOutput(
-        instance_id=instance['instance_id'],
-        # instance=instance.to_dict(orient='recorods'),
-        instruction=instruction_no_oracle,
-        metadata=metadata,
-        history=histories,
-        test_result=test_result,
-        metrics=metrics,
-    )
-    return output
-
-
-if __name__ == '__main__':
-    parser = get_parser()
-    parser.add_argument(
-        '-s',
-        '--eval-split',
-        type=str,
-        default='test',
-        choices=['test'],
-        help='data split to evaluate on, must be test',
-    )
-    args, _ = parser.parse_known_args()
-
-    data_split = args.eval_split
-
-    bench = load_dataset(
-        'JetBrains-Research/lca-ci-builds-repair', split=data_split
-    ).to_pandas()
-    # todo: see why 126 is giving problems on inference
-    # todo: see why 145 is giving problems on eval
-    bench = bench[bench['id'] != 126]
-    bench = bench[bench['id'] != 145]
-    # bench = bench.iloc[0:56]
-    # add column instnace_id for compatibility with oh repo, old id column must be kept for lca repo
-    bench['instance_id'] = bench['id'].astype(str)
-
-    llm_config = None
-    if args.llm_config:
-        llm_config = get_llm_config_arg(args.llm_config)
-        # modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
-        llm_config.modify_params = False
-    if llm_config is None:
-        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
-
-    metadata = make_metadata(
-        llm_config,
-        f'jetbrains-lca-ci--{data_split}',
-        args.agent_cls,
-        args.max_iterations,
-        args.eval_note,
-        args.eval_output_dir,
-    )
-    output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
-    instances = prepare_dataset(bench, output_file, args.eval_n_limit)
-
-    run_evaluation(
-        instances, metadata, output_file, args.eval_num_workers, process_instance
-    )
--- a/evaluation/benchmarks/lca_ci_build_repair/scripts/eval_infer.sh
+++ b/evaluation/benchmarks/lca_ci_build_repair/scripts/eval_infer.sh
@@ -1,33 +0,0 @@
-#!/usr/bin/env bash
-set -eo pipefail
-
-source "evaluation/utils/version_control.sh"
-
-PROCESS_FILEPATH=$1
-if [ -z "$PROCESS_FILEPATH" ]; then
-    echo "Error: PROCESS_FILEPATH is empty. Usage: ./eval_infer.sh <output_file> [instance_id] [dataset_name] [split]"
-    exit 1
-fi
-
-get_openhands_version
-
-PROCESS_FILEPATH=$(realpath $PROCESS_FILEPATH)
-echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
-echo "PROCESS_FILEPATH: $PROCESS_FILEPATH"
-
-EVAL_NOTE="$OPENHANDS_VERSION"
-if [ -n "$EXP_NAME" ]; then
-  EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
-fi
-
-function run_eval() {
-  COMMAND="poetry run python ./evaluation/benchmarks/lca_ci_build_repair/eval_infer.py \
-    --predictions-path $PROCESS_FILEPATH "
-
-  echo "RUNNING: $COMMAND"
-  # Run the command
-  eval $COMMAND
-}
-
-unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
-run_eval
--- a/evaluation/benchmarks/lca_ci_build_repair/scripts/run_infer.sh
+++ b/evaluation/benchmarks/lca_ci_build_repair/scripts/run_infer.sh
@@ -1,27 +0,0 @@
-#!/usr/bin/env bash
-set -eo pipefail
-
-source "evaluation/utils/version_control.sh"
-
-MODEL_CONFIG=$1
-
-get_openhands_version
-
-echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
-echo "MODEL_CONFIG: $MODEL_CONFIG"
-
-EVAL_NOTE="$OPENHANDS_VERSION"
-if [ -n "$EXP_NAME" ]; then
-  EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
-fi
-
-function run_eval() {
-  COMMAND="poetry run python ./evaluation/benchmarks/lca_ci_build_repair/run_infer.py \
-    --llm-config $MODEL_CONFIG "
-
-  # Run the command
-  eval $COMMAND
-}
-
-#unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
-run_eval
--- a/evaluation/benchmarks/lca_ci_build_repair/setup.py
+++ b/evaluation/benchmarks/lca_ci_build_repair/setup.py
@@ -1,60 +0,0 @@
-"""Installs LCA CI Build Repair benchmark with scripts for OH integration."""
-
-import os
-import shutil
-import subprocess
-
-import yaml
-
-
-def setup():
-    # Read config.yaml
-    print('Reading config.yaml')
-    script_dir = os.path.dirname(
-        os.path.abspath(__file__)
-    )  # Get the absolute path of the script
-    config_path = os.path.join(script_dir, 'config.yaml')
-    with open(config_path, 'r') as f:
-        config = yaml.safe_load(f)
-
-    lca_path = config['LCA_PATH']
-    lca_ci_path = os.path.join(
-        lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
-    )
-    repo_url = 'https://github.com/juanmichelini/lca-baselines'
-
-    # Clone the repository to LCA_CI_PATH
-    print(f'Cloning lca-baselines repository from {repo_url} into {lca_path}')
-    result = subprocess.run(
-        ['git', 'clone', repo_url], cwd=lca_path, capture_output=True, text=True
-    )
-    if result.returncode != 0:
-        print(f'Warning cloning repository: {result.stderr}')
-
-    # Clone the repository to LCA_CI_PATH
-    print('Switching branches')
-    result = subprocess.run(
-        ['git', 'switch', 'open-hands-integration'],
-        cwd=lca_ci_path,
-        capture_output=True,
-        text=True,
-    )
-    if result.returncode != 0:
-        print(f'Warning switching repository: {result.stderr}')
-
-    # Move and rename config_lca.yaml (overwrite if exists)
-    lca_ci_config_path = os.path.join(lca_ci_path, 'config.yaml')
-    print(f'Copying config.yaml to {lca_ci_config_path}')
-    shutil.copy(config_path, lca_ci_config_path)
-
-    # Run poetry install in LCA_CI_PATH
-    print(f"Running 'poetry install' in {lca_ci_path}")
-    result = subprocess.run(
-        ['poetry', 'install'], cwd=lca_ci_path, capture_output=True, text=True
-    )
-    if result.returncode != 0:
-        print(f'Warning during poetry install: {result.stderr}')
-
-
-if __name__ == '__main__':
-    setup()
--- a/evaluation/benchmarks/logic_reasoning/run_infer.py
+++ b/evaluation/benchmarks/logic_reasoning/run_infer.py
@@ -14,6 +14,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
@@ -63,6 +64,8 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
+
+    agent_config = update_agent_config_for_eval(agent_config)
    agent_config.enable_prompt_extensions = False
    return config

--- a/evaluation/benchmarks/mint/run_infer.py
+++ b/evaluation/benchmarks/mint/run_infer.py
@@ -19,6 +19,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
@@ -121,6 +122,8 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
+
+    agent_config = update_agent_config_for_eval(agent_config)
    agent_config.enable_prompt_extensions = False
    return config

--- a/evaluation/benchmarks/ml_bench/run_infer.py
+++ b/evaluation/benchmarks/ml_bench/run_infer.py
@@ -30,6 +30,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
@@ -91,6 +92,8 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
+
+    agent_config = update_agent_config_for_eval(agent_config)
    agent_config.enable_prompt_extensions = False
    return config

--- a/evaluation/benchmarks/swe_bench/README.md
+++ b/evaluation/benchmarks/swe_bench/README.md
@@ -2,8 +2,6 @@

 This folder contains the evaluation harness that we built on top of the original [SWE-Bench benchmark](https://www.swebench.com/) ([paper](https://arxiv.org/abs/2310.06770)).

-**UPDATE (4/8/2025): We now support running SWT-Bench evaluation! For more details, checkout [the corresponding section](#SWT-Bench-Evaluation).**
-
 **UPDATE (03/27/2025): We now support SWE-Bench multimodal evaluation! Simply use "princeton-nlp/SWE-bench_Multimodal" as the dataset name in the `run_infer.sh` script to evaluate on multimodal instances.**

 **UPDATE (2/18/2025): We now support running SWE-Gym using the same evaluation harness here. For more details, checkout [this README](./SWE-Gym.md).**
@@ -143,7 +141,7 @@ With `output.jsonl` file, you can run `eval_infer.sh` to evaluate generated patc
 ./evaluation/benchmarks/swe_bench/scripts/eval_infer.sh $YOUR_OUTPUT_JSONL [instance_id] [dataset_name] [split]

 # Example
-./evaluation/benchmarks/swe_bench/scripts/eval_infer.sh evaluation/evaluation_outputs/outputs/princeton-nlp__SWE-bench_Lite/CodeActAgent/gpt-4-1106-preview_maxiter_50_N_v1.0/output.jsonl
+./evaluation/benchmarks/swe_bench/scripts/eval_infer.sh evaluation/evaluation_outputs/outputs/swe_bench/CodeActAgent/gpt-4-1106-preview_maxiter_50_N_v1.0/output.jsonl
 ```

 The script now accepts optional arguments:
@@ -184,58 +182,3 @@ To clean-up all existing runtimes that you've already started, run:
 ```bash
 ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/utils/scripts/cleanup_remote_runtime.sh
 ```
-
-## SWT-Bench Evaluation
-
-[SWT-Bench](https://swtbench.com/) ([paper](https://arxiv.org/abs/2406.12952)) is a benchmark for evaluating the capability of LLMs at creating unit tests. It is performed on the same instances as SWE-Bench, but requires a separate evaluation harness to capture coverage and issue reproduction. We therefore detail below how to leverage the inference script in this folder to run inference on SWT-Bench and how to use the SWT-Bench evaluation harness to evaluate them.
-
-### Run inference on SWT-Bench
-
-To run inference on SWT-Bench, you can use the same `run_infer.sh` script as described for evaluation on plain SWE-Bench. The only differences is that you need to specify the `mode` parameter to `swt` or `swt-ci` when running the script. For example, to run inference on SWT-Bench Verified, run the following command:
-
-```bash
-./evaluation/benchmarks/swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers] [swe-dataset] test 1 swt
-
-# Example - This runs evaluation on CodeActAgent for 500 instances on "SWT-bench_Verified"'s test set (corresponding to SWE-bench_Verified), with max 100 iteration per instances, with 1 number of workers running in parallel
-./evaluation/benchmarks/swe_bench/scripts/run_infer.sh llm.eval_gpt4o-2024-11-20 HEAD CodeActAgent 500 100 1 princeton-nlp/SWE-bench_Verified test 1 swt
-```
-
-The two modes `swt` and `swt-ci` have the following effect:
- `swt`: This mode will change the prompt to instruct the agent to generate reproducing test cases instead of resolving the issue.
- `swt-ci`: In addition to the changes by `swt`, this mode sets up the CI environment by i) pre-installing the environment in the docker image, such that the test framework can be executed without errors and ii) telling the model the exact command to run the test framework.
-
-### Run evaluation for SWT-bench
-
-The evaluation of these results is done leveraging [the SWT-Bench evaluation harness](https://github.com/logic-star-ai/swt-bench/tree/master).
-
-#### Extracting results into SWT-Bench harness format
-In order to run evaluation of the obtained inference results in the SWT-Bench harness, we transform the results to a format that the SWT-Bench evaluation harness expects.
-
-```bash
-python3 evaluation/benchmarks/swe_bench/scripts/swtbench/convert.py --prediction_file [output.jsonl] > [output_swt.jsonl]
-
-# Example
-python3 evaluation/benchmarks/swe_bench/scripts/swtbench/convert.py --prediction_file "evaluation/evaluation_outputs/outputs/princeton-nlp__SWE-bench_Verified-test/CodeActAgent/gpt-4o-2024-11-20_maxiter_100_N_v0.31.0-no-hint-swt-run_1/output.jsonl" > OpenHands-gpt-4o-2024-11-20.jsonl
-```
-
-#### Running the results in SWT-Bench
-
-Next, we run the [SWT-Bench evaluation harness](https://github.com/logic-star-ai/swt-bench/tree/master) with these results.
-First set-up and validate the setup as described in the harness [here](https://github.com/logic-star-ai/swt-bench/tree/master?tab=readme-ov-file#-set-up).
-Then, run the evaluation with the following command:
-
-```bash
-# Example
-python3 -m src.main \
-    --dataset_name princeton-nlp/SWE-bench_Verified \
-    --predictions_path <pathTo>/OpenHands-gpt-4o-2024-11-20.jsonl \
-    --max_workers 12 \
-    --run_id OpenHands-CodeAct-gpt-4o-2024-11-20  --patch_types vanilla  --build_mode api
-```
-
-The results of the evaluation can be obtained by running the reporting script of the harness.
-
-```bash
-# Example
-python -m src.report run_instance_swt_logs/OpenHands-CodeAct-gpt-4o-2024-11-20/OpenHands__CodeActAgent__gpt-4o-2024-11-20 --dataset verified
-```
--- a/evaluation/benchmarks/swe_bench/resource/swt_bench_constants.py
+++ b/evaluation/benchmarks/swe_bench/resource/swt_bench_constants.py
@@ -1,842 +0,0 @@
-# Based on https://github.com/logic-star-ai/swt-bench/blob/master/src/constants.py
-
-# Constants - Installation Specifications
-MAP_VERSION_TO_INSTALL_SKLEARN = {
-    k: {
-        'python': '3.6',
-        'packages': 'numpy scipy cython pytest pandas matplotlib',
-        'install': 'python -m pip install -v --no-use-pep517 --no-build-isolation -e .',
-        'pip_packages': [
-            'cython',
-            'numpy==1.19.2',
-            'setuptools',
-            'scipy==1.5.2',
-        ],
-    }
-    for k in ['0.20', '0.21', '0.22']
-}
-MAP_VERSION_TO_INSTALL_SKLEARN.update(
-    {
-        k: {
-            'python': '3.9',
-            'packages': "'numpy==1.19.2' 'scipy==1.5.2' 'cython==3.0.10' pytest 'pandas<2.0.0' 'matplotlib<3.9.0' setuptools pytest joblib threadpoolctl",
-            'install': 'python -m pip install -v --no-use-pep517 --no-build-isolation -e .',
-            'pip_packages': ['cython', 'setuptools', 'numpy', 'scipy'],
-        }
-        for k in ['1.3', '1.4']
-    }
-)
-MAP_VERSION_TO_INSTALL_FLASK = {
-    '2.0': {
-        'python': '3.9',
-        'packages': 'requirements.txt',
-        'install': 'python -m pip install -e .',
-        'pip_packages': [
-            'setuptools==70.0.0',
-            'Werkzeug==2.3.7',
-            'Jinja2==3.0.1',
-            'itsdangerous==2.1.2',
-            'click==8.0.1',
-            'MarkupSafe==2.1.3',
-        ],
-    },
-    '2.1': {
-        'python': '3.10',
-        'packages': 'requirements.txt',
-        'install': 'python -m pip install -e .',
-        'pip_packages': [
-            'click==8.1.3',
-            'itsdangerous==2.1.2',
-            'Jinja2==3.1.2',
-            'MarkupSafe==2.1.1',
-            'Werkzeug==2.3.7',
-        ],
-    },
-}
-MAP_VERSION_TO_INSTALL_FLASK.update(
-    {
-        k: {
-            'python': '3.11',
-            'packages': 'requirements.txt',
-            'install': 'python -m pip install -e .',
-            'pip_packages': [
-                'click==8.1.3',
-                'itsdangerous==2.1.2',
-                'Jinja2==3.1.2',
-                'MarkupSafe==2.1.1',
-                'Werkzeug==2.3.7',
-            ],
-        }
-        for k in ['2.2', '2.3']
-    }
-)
-MAP_VERSION_TO_INSTALL_DJANGO = {
-    k: {
-        'python': '3.5',
-        'packages': 'requirements.txt',
-        'pre_install': [
-            'apt-get update && apt-get install -y locales',
-            "echo 'en_US UTF-8' > /etc/locale.gen",
-            'locale-gen en_US.UTF-8',
-        ],
-        'install': 'python setup.py install',
-        'pip_packages': ['setuptools'],
-        'eval_commands': [
-            'export LANG=en_US.UTF-8',
-            'export LC_ALL=en_US.UTF-8',
-            'export PYTHONIOENCODING=utf8',
-            'export LANGUAGE=en_US:en',
-        ],
-    }
-    for k in ['1.7', '1.8', '1.9', '1.10', '1.11', '2.0', '2.1', '2.2']
-}
-MAP_VERSION_TO_INSTALL_DJANGO.update(
-    {
-        k: {'python': '3.5', 'install': 'python setup.py install'}
-        for k in ['1.4', '1.5', '1.6']
-    }
-)
-MAP_VERSION_TO_INSTALL_DJANGO.update(
-    {
-        k: {
-            'python': '3.6',
-            'packages': 'requirements.txt',
-            'install': 'python -m pip install -e .',
-            'eval_commands': [
-                "sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen",
-                'export LANG=en_US.UTF-8',
-                'export LANGUAGE=en_US:en',
-                'export LC_ALL=en_US.UTF-8',
-            ],
-        }
-        for k in ['3.0', '3.1', '3.2']
-    }
-)
-MAP_VERSION_TO_INSTALL_DJANGO.update(
-    {
-        k: {
-            'python': '3.8',
-            'packages': 'requirements.txt',
-            'install': 'python -m pip install -e .',
-        }
-        for k in ['4.0']
-    }
-)
-MAP_VERSION_TO_INSTALL_DJANGO.update(
-    {
-        k: {
-            'python': '3.9',
-            'packages': 'requirements.txt',
-            'install': 'python -m pip install -e .',
-        }
-        for k in ['4.1', '4.2']
-    }
-)
-MAP_VERSION_TO_INSTALL_DJANGO.update(
-    {
-        k: {
-            'python': '3.11',
-            'packages': 'requirements.txt',
-            'install': 'python -m pip install -e .',
-        }
-        for k in ['5.0']
-    }
-)
-MAP_VERSION_TO_INSTALL_REQUESTS = {
-    k: {'python': '3.9', 'packages': 'pytest', 'install': 'python -m pip install .'}
-    for k in ['0.7', '0.8', '0.9', '0.11', '0.13', '0.14', '1.1', '1.2', '2.0', '2.2']
-    + ['2.3', '2.4', '2.5', '2.7', '2.8', '2.9', '2.10', '2.11', '2.12', '2.17']
-    + ['2.18', '2.19', '2.22', '2.26', '2.25', '2.27', '3.0']
-}
-MAP_VERSION_TO_INSTALL_SEABORN = {
-    k: {
-        'python': '3.9',
-        'install': 'python -m pip install -e .',
-        'pip_packages': [
-            'contourpy==1.1.0',
-            'cycler==0.11.0',
-            'fonttools==4.42.1',
-            'importlib-resources==6.0.1',
-            'kiwisolver==1.4.5',
-            'matplotlib==3.7.2',
-            'numpy==1.25.2',
-            'packaging==23.1',
-            'pandas==1.3.5',  # 2.0.3
-            'pillow==10.0.0',
-            'pyparsing==3.0.9',
-            'pytest',
-            'python-dateutil==2.8.2',
-            'pytz==2023.3.post1',
-            'scipy==1.11.2',
-            'six==1.16.0',
-            'tzdata==2023.1',
-            'zipp==3.16.2',
-        ],
-    }
-    for k in ['0.11']
-}
-MAP_VERSION_TO_INSTALL_SEABORN.update(
-    {
-        k: {
-            'python': '3.9',
-            'install': 'python -m pip install -e .[dev]',
-            'pip_packages': [
-                'contourpy==1.1.0',
-                'cycler==0.11.0',
-                'fonttools==4.42.1',
-                'importlib-resources==6.0.1',
-                'kiwisolver==1.4.5',
-                'matplotlib==3.7.2',
-                'numpy==1.25.2',
-                'packaging==23.1',
-                'pandas==2.0.0',
-                'pillow==10.0.0',
-                'pyparsing==3.0.9',
-                'pytest',
-                'python-dateutil==2.8.2',
-                'pytz==2023.3.post1',
-                'scipy==1.11.2',
-                'six==1.16.0',
-                'tzdata==2023.1',
-                'zipp==3.16.2',
-            ],
-        }
-        for k in ['0.12', '0.13']
-    }
-)
-MAP_VERSION_TO_INSTALL_PYTEST = {
-    k: {'python': '3.9', 'install': 'python -m pip install -e .'}
-    for k in [
-        '4.4',
-        '4.5',
-        '4.6',
-        '5.0',
-        '5.1',
-        '5.2',
-        '5.3',
-        '5.4',
-        '6.0',
-        '6.2',
-        '6.3',
-        '7.0',
-        '7.1',
-        '7.2',
-        '7.4',
-        '8.0',
-    ]
-}
-MAP_VERSION_TO_INSTALL_PYTEST['4.4']['pip_packages'] = [
-    'atomicwrites==1.4.1',
-    'attrs==23.1.0',
-    'more-itertools==10.1.0',
-    'pluggy==0.13.1',
-    'py==1.11.0',
-    'setuptools==68.0.0',
-    'six==1.16.0',
-]
-MAP_VERSION_TO_INSTALL_PYTEST['4.5']['pip_packages'] = [
-    'atomicwrites==1.4.1',
-    'attrs==23.1.0',
-    'more-itertools==10.1.0',
-    'pluggy==0.11.0',
-    'py==1.11.0',
-    'setuptools==68.0.0',
-    'six==1.16.0',
-    'wcwidth==0.2.6',
-]
-MAP_VERSION_TO_INSTALL_PYTEST['4.6']['pip_packages'] = [
-    'atomicwrites==1.4.1',
-    'attrs==23.1.0',
-    'more-itertools==10.1.0',
-    'packaging==23.1',
-    'pluggy==0.13.1',
-    'py==1.11.0',
-    'six==1.16.0',
-    'wcwidth==0.2.6',
-]
-for k in ['5.0', '5.1', '5.2']:
-    MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
-        'atomicwrites==1.4.1',
-        'attrs==23.1.0',
-        'more-itertools==10.1.0',
-        'packaging==23.1',
-        'pluggy==0.13.1',
-        'py==1.11.0',
-        'wcwidth==0.2.6',
-    ]
-MAP_VERSION_TO_INSTALL_PYTEST['5.3']['pip_packages'] = [
-    'attrs==23.1.0',
-    'more-itertools==10.1.0',
-    'packaging==23.1',
-    'pluggy==0.13.1',
-    'py==1.11.0',
-    'wcwidth==0.2.6',
-]
-MAP_VERSION_TO_INSTALL_PYTEST['5.4']['pip_packages'] = [
-    'py==1.11.0',
-    'packaging==23.1',
-    'attrs==23.1.0',
-    'more-itertools==10.1.0',
-    'pluggy==0.13.1',
-]
-MAP_VERSION_TO_INSTALL_PYTEST['6.0']['pip_packages'] = [
-    'attrs==23.1.0',
-    'iniconfig==2.0.0',
-    'more-itertools==10.1.0',
-    'packaging==23.1',
-    'pluggy==0.13.1',
-    'py==1.11.0',
-    'toml==0.10.2',
-]
-for k in ['6.2', '6.3']:
-    MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
-        'attrs==23.1.0',
-        'iniconfig==2.0.0',
-        'packaging==23.1',
-        'pluggy==0.13.1',
-        'py==1.11.0',
-        'toml==0.10.2',
-    ]
-MAP_VERSION_TO_INSTALL_PYTEST['7.0']['pip_packages'] = [
-    'attrs==23.1.0',
-    'iniconfig==2.0.0',
-    'packaging==23.1',
-    'pluggy==0.13.1',
-    'py==1.11.0',
-]
-for k in ['7.1', '7.2']:
-    MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
-        'attrs==23.1.0',
-        'iniconfig==2.0.0',
-        'packaging==23.1',
-        'pluggy==0.13.1',
-        'py==1.11.0',
-        'tomli==2.0.1',
-    ]
-MAP_VERSION_TO_INSTALL_PYTEST['7.4']['pip_packages'] = [
-    'iniconfig==2.0.0',
-    'packaging==23.1',
-    'pluggy==1.3.0',
-    'exceptiongroup==1.1.3',
-    'tomli==2.0.1',
-]
-MAP_VERSION_TO_INSTALL_PYTEST['8.0']['pip_packages'] = [
-    'iniconfig==2.0.0',
-    'packaging==23.1',
-    'pluggy==1.3.0',
-    'exceptiongroup==1.1.3',
-    'tomli==2.0.1',
-]
-MAP_VERSION_TO_INSTALL_MATPLOTLIB = {
-    k: {
-        'python': '3.11',
-        'packages': 'environment.yml',
-        'install': 'python -m pip install -e .',
-        'pre_install': [
-            'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg texlive texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-luatex cm-super dvipng'
-        ],
-        'pip_packages': [
-            'contourpy==1.1.0',
-            'cycler==0.11.0',
-            'fonttools==4.42.1',
-            'ghostscript',
-            'kiwisolver==1.4.5',
-            'numpy==1.25.2',
-            'packaging==23.1',
-            'pillow==10.0.0',
-            'pikepdf',
-            'pyparsing==3.0.9',
-            'python-dateutil==2.8.2',
-            'six==1.16.0',
-            'setuptools==68.1.2',
-            'setuptools-scm==7.1.0',
-            'typing-extensions==4.7.1',
-        ],
-    }
-    for k in ['3.5', '3.6', '3.7']
-}
-MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
-    {
-        k: {
-            'python': '3.8',
-            'packages': 'requirements.txt',
-            'install': 'python -m pip install -e .',
-            'pre_install': [
-                'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg libfreetype6-dev pkg-config texlive texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-luatex cm-super'
-            ],
-            'pip_packages': ['pytest', 'ipython'],
-        }
-        for k in ['3.1', '3.2', '3.3', '3.4']
-    }
-)
-MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
-    {
-        k: {
-            'python': '3.7',
-            'packages': 'requirements.txt',
-            'install': 'python -m pip install -e .',
-            'pre_install': [
-                'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg libfreetype6-dev pkg-config'
-            ],
-            'pip_packages': ['pytest'],
-        }
-        for k in ['3.0']
-    }
-)
-MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
-    {
-        k: {
-            'python': '3.5',
-            'install': 'python setup.py build; python setup.py install',
-            'pre_install': [
-                'apt-get -y update && apt-get -y upgrade && && apt-get install -y imagemagick ffmpeg'
-            ],
-            'pip_packages': ['pytest'],
-            'execute_test_as_nonroot': True,
-        }
-        for k in ['2.0', '2.1', '2.2', '1.0', '1.1', '1.2', '1.3', '1.4', '1.5']
-    }
-)
-MAP_VERSION_TO_INSTALL_SPHINX = {
-    k: {
-        'python': '3.9',
-        'pip_packages': ['tox==4.16.0', 'tox-current-env==0.0.11'],
-        'install': 'python -m pip install -e .[test]',
-        'pre_install': ["sed -i 's/pytest/pytest -rA/' tox.ini"],
-    }
-    for k in ['1.5', '1.6', '1.7', '1.8', '2.0', '2.1', '2.2', '2.3', '2.4', '3.0']
-    + ['3.1', '3.2', '3.3', '3.4', '3.5', '4.0', '4.1', '4.2', '4.3', '4.4']
-    + ['4.5', '5.0', '5.1', '5.2', '5.3', '6.0', '6.2', '7.0', '7.1', '7.2']
-}
-for k in ['3.0', '3.1', '3.2', '3.3', '3.4', '3.5', '4.0', '4.1', '4.2', '4.3', '4.4']:
-    MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
-        [
-            "sed -i 's/Jinja2>=2.3/Jinja2<3.0/' setup.py",
-            "sed -i 's/sphinxcontrib-applehelp/sphinxcontrib-applehelp<=1.0.7/' setup.py",
-            "sed -i 's/sphinxcontrib-devhelp/sphinxcontrib-devhelp<=1.0.5/' setup.py",
-            "sed -i 's/sphinxcontrib-qthelp/sphinxcontrib-qthelp<=1.0.6/' setup.py",
-            "sed -i 's/alabaster>=0.7,<0.8/alabaster>=0.7,<0.7.12/' setup.py",
-            "sed -i \"s/'packaging',/'packaging', 'markupsafe<=2.0.1',/\" setup.py",
-        ]
-    )
-    if k in ['4.2', '4.3', '4.4']:
-        MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
-            [
-                "sed -i 's/sphinxcontrib-htmlhelp>=2.0.0/sphinxcontrib-htmlhelp>=2.0.0,<=2.0.4/' setup.py",
-                "sed -i 's/sphinxcontrib-serializinghtml>=1.1.5/sphinxcontrib-serializinghtml>=1.1.5,<=1.1.9/' setup.py",
-            ]
-        )
-    elif k == '4.1':
-        MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
-            [
-                (
-                    "grep -q 'sphinxcontrib-htmlhelp>=2.0.0' setup.py && "
-                    "sed -i 's/sphinxcontrib-htmlhelp>=2.0.0/sphinxcontrib-htmlhelp>=2.0.0,<=2.0.4/' setup.py || "
-                    "sed -i 's/sphinxcontrib-htmlhelp/sphinxcontrib-htmlhelp<=2.0.4/' setup.py"
-                ),
-                (
-                    "grep -q 'sphinxcontrib-serializinghtml>=1.1.5' setup.py && "
-                    "sed -i 's/sphinxcontrib-serializinghtml>=1.1.5/sphinxcontrib-serializinghtml>=1.1.5,<=1.1.9/' setup.py || "
-                    "sed -i 's/sphinxcontrib-serializinghtml/sphinxcontrib-serializinghtml<=1.1.9/' setup.py"
-                ),
-            ]
-        )
-    else:
-        MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
-            [
-                "sed -i 's/sphinxcontrib-htmlhelp/sphinxcontrib-htmlhelp<=2.0.4/' setup.py",
-                "sed -i 's/sphinxcontrib-serializinghtml/sphinxcontrib-serializinghtml<=1.1.9/' setup.py",
-            ]
-        )
-MAP_VERSION_TO_INSTALL_SPHINX['7.2']['pre_install'] += [
-    'apt-get update && apt-get install -y graphviz'
-]
-MAP_VERSION_TO_INSTALL_ASTROPY = {
-    k: {
-        'python': '3.9',
-        'install': 'python -m pip install -e .[test] --verbose',
-        'pip_packages': [
-            'attrs==23.1.0',
-            'exceptiongroup==1.1.3',
-            'execnet==2.0.2',
-            'hypothesis==6.82.6',
-            'iniconfig==2.0.0',
-            'numpy==1.25.2',
-            'packaging==23.1',
-            'pluggy==1.3.0',
-            'psutil==5.9.5',
-            'pyerfa==2.0.0.3',
-            'pytest-arraydiff==0.5.0',
-            'pytest-astropy-header==0.2.2',
-            'pytest-astropy==0.10.0',
-            'pytest-cov==4.1.0',
-            'pytest-doctestplus==1.0.0',
-            'pytest-filter-subpackage==0.1.2',
-            'pytest-mock==3.11.1',
-            'pytest-openfiles==0.5.0',
-            'pytest-remotedata==0.4.0',
-            'pytest-xdist==3.3.1',
-            'pytest==7.4.0',
-            'PyYAML==6.0.1',
-            'setuptools==68.0.0',
-            'sortedcontainers==2.4.0',
-            'tomli==2.0.1',
-        ],
-    }
-    for k in ['0.1', '0.2', '0.3', '0.4', '1.1', '1.2', '1.3', '3.0', '3.1', '3.2']
-    + ['4.1', '4.2', '4.3', '5.0', '5.1', '5.2']
-}
-for k in ['4.1', '4.2', '4.3', '5.0', '5.1', '5.2']:
-    MAP_VERSION_TO_INSTALL_ASTROPY[k]['pre_install'] = [
-        'sed -i \'s/requires = \\["setuptools",/requires = \\["setuptools==68.0.0",/\' pyproject.toml'
-    ]
-MAP_VERSION_TO_INSTALL_SYMPY = {
-    k: {
-        'python': '3.9',
-        'packages': 'mpmath flake8',
-        'pip_packages': ['mpmath==1.3.0', 'flake8-comprehensions'],
-        'install': 'python -m pip install -e .',
-    }
-    for k in ['0.7', '1.0', '1.1', '1.10', '1.11', '1.12', '1.2', '1.4', '1.5', '1.6']
-    + ['1.7', '1.8', '1.9']
-}
-MAP_VERSION_TO_INSTALL_SYMPY.update(
-    {
-        k: {
-            'python': '3.9',
-            'packages': 'requirements.txt',
-            'install': 'python -m pip install -e .',
-            'pip_packages': ['mpmath==1.3.0'],
-        }
-        for k in ['1.13']
-    }
-)
-MAP_VERSION_TO_INSTALL_PYLINT = {
-    k: {
-        'python': '3.9',
-        'packages': 'requirements.txt',
-        'install': 'python -m pip install -e .',
-    }
-    for k in [
-        '2.10',
-        '2.11',
-        '2.13',
-        '2.14',
-        '2.15',
-        '2.16',
-        '2.17',
-        '2.8',
-        '2.9',
-        '3.0',
-    ]
-}
-MAP_VERSION_TO_INSTALL_PYLINT['2.8']['pip_packages'] = ['pyenchant==3.2']
-MAP_VERSION_TO_INSTALL_PYLINT['2.8']['pre_install'] = [
-    'apt-get update && apt-get install -y libenchant-2-dev hunspell-en-us'
-]
-MAP_VERSION_TO_INSTALL_PYLINT.update(
-    {
-        k: {
-            **MAP_VERSION_TO_INSTALL_PYLINT[k],
-            'pip_packages': ['astroid==3.0.0a6', 'setuptools'],
-        }
-        for k in ['3.0']
-    }
-)
-
-MAP_VERSION_TO_INSTALL_XARRAY = {
-    k: {
-        'python': '3.10',
-        'packages': 'environment.yml',
-        'install': 'python -m pip install -e .',
-        'pip_packages': [
-            'numpy==1.23.0',
-            'packaging==23.1',
-            'pandas==1.5.3',
-            'pytest==7.4.0',
-            'python-dateutil==2.8.2',
-            'pytz==2023.3',
-            'six==1.16.0',
-            'scipy==1.11.1',
-            'setuptools==68.0.0',
-        ],
-        'no_use_env': True,
-    }
-    for k in ['0.12', '0.18', '0.19', '0.20', '2022.03', '2022.06', '2022.09']
-}
-
-MAP_VERSION_TO_INSTALL_SQLFLUFF = {
-    k: {
-        'python': '3.9',
-        'packages': 'requirements.txt',
-        'install': 'python -m pip install -e .',
-    }
-    for k in [
-        '0.10',
-        '0.11',
-        '0.12',
-        '0.13',
-        '0.4',
-        '0.5',
-        '0.6',
-        '0.8',
-        '0.9',
-        '1.0',
-        '1.1',
-        '1.2',
-        '1.3',
-        '1.4',
-        '2.0',
-        '2.1',
-        '2.2',
-    ]
-}
-MAP_VERSION_TO_INSTALL_DBT_CORE = {
-    k: {
-        'python': '3.9',
-        'packages': 'requirements.txt',
-        'install': 'python -m pip install -e .',
-    }
-    for k in [
-        '0.13',
-        '0.14',
-        '0.15',
-        '0.16',
-        '0.17',
-        '0.18',
-        '0.19',
-        '0.20',
-        '0.21',
-        '1.0',
-        '1.1',
-        '1.2',
-        '1.3',
-        '1.4',
-        '1.5',
-        '1.6',
-        '1.7',
-    ]
-}
-MAP_VERSION_TO_INSTALL_PYVISTA = {
-    k: {
-        'python': '3.9',
-        'install': 'python -m pip install -e .',
-        'pip_packages': ['pytest'],
-    }
-    for k in ['0.20', '0.21', '0.22', '0.23']
-}
-MAP_VERSION_TO_INSTALL_PYVISTA.update(
-    {
-        k: {
-            'python': '3.9',
-            'packages': 'requirements.txt',
-            'install': 'python -m pip install -e .',
-            'pip_packages': ['pytest'],
-        }
-        for k in [
-            '0.24',
-            '0.25',
-            '0.26',
-            '0.27',
-            '0.28',
-            '0.29',
-            '0.30',
-            '0.31',
-            '0.32',
-            '0.33',
-            '0.34',
-            '0.35',
-            '0.36',
-            '0.37',
-            '0.38',
-            '0.39',
-            '0.40',
-            '0.41',
-            '0.42',
-            '0.43',
-        ]
-    }
-)
-MAP_VERSION_TO_INSTALL_ASTROID = {
-    k: {
-        'python': '3.9',
-        'install': 'python -m pip install -e .',
-        'pip_packages': ['pytest'],
-    }
-    for k in [
-        '2.10',
-        '2.12',
-        '2.13',
-        '2.14',
-        '2.15',
-        '2.16',
-        '2.5',
-        '2.6',
-        '2.7',
-        '2.8',
-        '2.9',
-        '3.0',
-    ]
-}
-MAP_VERSION_TO_INSTALL_MARSHMALLOW = {
-    k: {
-        'python': '3.9',
-        'install': "python -m pip install -e '.[dev]'",
-    }
-    for k in [
-        '2.18',
-        '2.19',
-        '2.20',
-        '3.0',
-        '3.1',
-        '3.10',
-        '3.11',
-        '3.12',
-        '3.13',
-        '3.15',
-        '3.16',
-        '3.19',
-        '3.2',
-        '3.4',
-        '3.8',
-        '3.9',
-    ]
-}
-MAP_VERSION_TO_INSTALL_PVLIB = {
-    k: {
-        'python': '3.9',
-        'install': 'python -m pip install -e .[all]',
-        'packages': 'pandas scipy',
-        'pip_packages': ['jupyter', 'ipython', 'matplotlib', 'pytest', 'flake8'],
-    }
-    for k in ['0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9']
-}
-MAP_VERSION_TO_INSTALL_PYDICOM = {
-    k: {'python': '3.6', 'install': 'python -m pip install -e .', 'packages': 'numpy'}
-    for k in [
-        '1.0',
-        '1.1',
-        '1.2',
-        '1.3',
-        '1.4',
-        '2.0',
-        '2.1',
-        '2.2',
-        '2.3',
-        '2.4',
-        '3.0',
-    ]
-}
-MAP_VERSION_TO_INSTALL_PYDICOM.update(
-    {k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.8'} for k in ['1.4', '2.0']}
-)
-MAP_VERSION_TO_INSTALL_PYDICOM.update(
-    {k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.9'} for k in ['2.1', '2.2']}
-)
-MAP_VERSION_TO_INSTALL_PYDICOM.update(
-    {k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.10'} for k in ['2.3']}
-)
-MAP_VERSION_TO_INSTALL_PYDICOM.update(
-    {k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.11'} for k in ['2.4', '3.0']}
-)
-MAP_VERSION_TO_INSTALL_HUMANEVAL = {k: {'python': '3.9'} for k in ['1.0']}
-MAP_VERSION_TO_INSTALL_HUMANEVAL_FIX = {
-    k: {'python': '3.10', 'packages': 'pytest'} for k in ['0.0.1']
-}
-
-# Constants - Task Instance Instllation Environment
-MAP_VERSION_TO_INSTALL = {
-    'astropy/astropy': MAP_VERSION_TO_INSTALL_ASTROPY,
-    'dbt-labs/dbt-core': MAP_VERSION_TO_INSTALL_DBT_CORE,
-    'django/django': MAP_VERSION_TO_INSTALL_DJANGO,
-    'matplotlib/matplotlib': MAP_VERSION_TO_INSTALL_MATPLOTLIB,
-    'marshmallow-code/marshmallow': MAP_VERSION_TO_INSTALL_MARSHMALLOW,
-    'mwaskom/seaborn': MAP_VERSION_TO_INSTALL_SEABORN,
-    'pallets/flask': MAP_VERSION_TO_INSTALL_FLASK,
-    'psf/requests': MAP_VERSION_TO_INSTALL_REQUESTS,
-    'pvlib/pvlib-python': MAP_VERSION_TO_INSTALL_PVLIB,
-    'pydata/xarray': MAP_VERSION_TO_INSTALL_XARRAY,
-    'pydicom/pydicom': MAP_VERSION_TO_INSTALL_PYDICOM,
-    'pylint-dev/astroid': MAP_VERSION_TO_INSTALL_ASTROID,
-    'pylint-dev/pylint': MAP_VERSION_TO_INSTALL_PYLINT,
-    'pytest-dev/pytest': MAP_VERSION_TO_INSTALL_PYTEST,
-    'pyvista/pyvista': MAP_VERSION_TO_INSTALL_PYVISTA,
-    'scikit-learn/scikit-learn': MAP_VERSION_TO_INSTALL_SKLEARN,
-    'sphinx-doc/sphinx': MAP_VERSION_TO_INSTALL_SPHINX,
-    'sqlfluff/sqlfluff': MAP_VERSION_TO_INSTALL_SQLFLUFF,
-    'swe-bench/humaneval': MAP_VERSION_TO_INSTALL_HUMANEVAL,
-    'nielstron/humaneval_fix': MAP_VERSION_TO_INSTALL_HUMANEVAL_FIX,
-    'sympy/sympy': MAP_VERSION_TO_INSTALL_SYMPY,
-}
-
-# Constants - Repository Specific Installation Instructions
-MAP_REPO_TO_INSTALL = {}
-
-# Constants - Task Instance Test Frameworks
-TEST_PYTEST_VERBOSE = 'pytest -rA --tb=long -p no:cacheprovider'
-MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE = {
-    'astropy/astropy': {
-        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_ASTROPY.keys()
-    },
-    'django/django': {
-        k: './tests/runtests.py --verbosity 2 --settings=test_sqlite --parallel 1'
-        for k in MAP_VERSION_TO_INSTALL_DJANGO.keys()
-    },
-    'marshmallow-code/marshmallow': {
-        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_MARSHMALLOW.keys()
-    },
-    'matplotlib/matplotlib': {
-        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_MATPLOTLIB.keys()
-    },
-    'mwaskom/seaborn': {
-        k: 'pytest -rA --tb=long' for k in MAP_VERSION_TO_INSTALL_SEABORN.keys()
-    },
-    'pallets/flask': {
-        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_FLASK.keys()
-    },
-    'psf/requests': {
-        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_REQUESTS.keys()
-    },
-    'pvlib/pvlib-python': {
-        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PVLIB.keys()
-    },
-    'pydata/xarray': {
-        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_XARRAY.keys()
-    },
-    'pydicom/pydicom': {
-        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYDICOM.keys()
-    },
-    'pylint-dev/astroid': {
-        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_ASTROID.keys()
-    },
-    'pylint-dev/pylint': {
-        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYLINT.keys()
-    },
-    'pytest-dev/pytest': {
-        k: 'pytest -rA --tb=long' for k in MAP_VERSION_TO_INSTALL_PYTEST.keys()
-    },
-    'pyvista/pyvista': {
-        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYVISTA.keys()
-    },
-    'scikit-learn/scikit-learn': {
-        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_SKLEARN.keys()
-    },
-    'sphinx-doc/sphinx': {
-        k: 'tox -epy39 -v --' for k in MAP_VERSION_TO_INSTALL_SPHINX.keys()
-    },
-    'sqlfluff/sqlfluff': {
-        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_SQLFLUFF.keys()
-    },
-    'swe-bench/humaneval': {
-        k: 'python' for k in MAP_VERSION_TO_INSTALL_HUMANEVAL.keys()
-    },
-    'nielstron/humaneval_fix': {
-        k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_HUMANEVAL.keys()
-    },
-    'sympy/sympy': {
-        k: 'bin/test -C --verbose' for k in MAP_VERSION_TO_INSTALL_SYMPY.keys()
-    },
-}
-MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE['django/django']['1.9'] = (
-    './tests/runtests.py --verbosity 2'
-)
--- a/evaluation/benchmarks/swe_bench/run_infer.py
+++ b/evaluation/benchmarks/swe_bench/run_infer.py
@@ -3,7 +3,7 @@ import copy
 import json
 import os
 import tempfile
-from typing import Any, Literal
+from typing import Any

 import pandas as pd
 import toml
@@ -17,11 +17,6 @@ from evaluation.benchmarks.swe_bench.binary_patch_utils import (
 from evaluation.benchmarks.swe_bench.resource.mapping import (
    get_instance_resource_factor,
 )
-from evaluation.benchmarks.swe_bench.resource.swt_bench_constants import (
-    MAP_REPO_TO_INSTALL,
-    MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE,
-    MAP_VERSION_TO_INSTALL,
-)
 from evaluation.utils.shared import (
    EvalException,
    EvalMetadata,
@@ -35,6 +30,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
    update_llm_config_for_completions_logging,
 )
 from openhands.controller.state.state import State
@@ -60,7 +56,6 @@ from openhands.utils.shutdown_listener import sleep_if_should_continue

 USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
 RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true'
-BenchMode = Literal['swe', 'swt', 'swt-ci']


 AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
@@ -74,36 +69,7 @@ def _get_swebench_workspace_dir_name(instance: pd.Series) -> str:

 def get_instruction(instance: pd.Series, metadata: EvalMetadata) -> MessageAction:
    workspace_dir_name = _get_swebench_workspace_dir_name(instance)
-    mode = metadata.details['mode']
-    if mode.startswith('swt'):
-        test_instructions = (
-            f'The following command can be used to run the tests: `{list(MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE[instance.repo].values())[0]}`. Make sure they fail in the expected way.\n'
-            if mode.endswith('ci')
-            else ''
-        )
-        instruction = f"""\
-<uploaded_files>
-/workspace/{workspace_dir_name}
-</uploaded_files>
-I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following issue description:
-
-<issue_description>
-{instance.problem_statement}
-</issue_description>
-
-
-Can you help me implement the necessary changes to the repository to test whether the issue in <issue_description> was resolved?
-I will take care of all changes to any of the non-test files. This means you DON'T have to modify the actual logic and ONLY have to update test logic and tests!
-Your task is to make the minimal changes to tests files in the /workspace directory to reproduce the issue in the <issue_description>, i.e., such that the generated tests fail in the current state (where the issue is unresolved) and pass when the issue will be resolved.
-Follow these steps to reproduce the issue:
-1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.
-2. Create a script `reproduction.py` to reproduce the error and execute it with `python reproduction.py` using the BashTool, to confirm the error
-3. Edit the sourcecode of the repo to integrate your reproduction script into the test framework
-4. Run the test framework and make sure your tests fail! Only submit FAILING tests! Never submit passing tests.
-{test_instructions}Your thinking should be thorough and so it's fine if it's very long.
-"""
-    else:
-        instruction = f"""
+    instruction = f"""
 <uploaded_files>
 /workspace/{workspace_dir_name}
 </uploaded_files>
@@ -266,6 +232,7 @@ def get_config(
        condenser=metadata.condenser_config,
        enable_prompt_extensions=False,
    )
+    agent_config = update_agent_config_for_eval(agent_config)
    config.set_agent_config(agent_config)
    return config

@@ -391,30 +358,6 @@ def initialize_runtime(
    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
    assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')

-    if metadata.details['mode'] == 'swt-ci':
-        # set up repo
-        setup_commands = []
-        if instance['repo'] in MAP_REPO_TO_INSTALL:
-            setup_commands.append(MAP_REPO_TO_INSTALL[instance['repo']])
-
-        # Run pre-install set up if provided
-        install = MAP_VERSION_TO_INSTALL.get(instance['repo'], {}).get(
-            instance['version'], []
-        )
-        if 'pre_install' in install:
-            for pre_install in install['pre_install']:
-                setup_commands.append(pre_install)
-
-        if 'install' in install:
-            setup_commands.append(install['install'])
-
-        for command in setup_commands:
-            action = CmdRunAction(command=command)
-            action.set_hard_timeout(600)
-            logger.info(action, extra={'msg_type': 'ACTION'})
-            obs = runtime.run_action(action)
-            logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-
    if 'multimodal' not in metadata.dataset.lower():
        # Only for non-multimodal datasets, we need to activate the testbed environment for Python
        # SWE-Bench multimodal datasets are not using the testbed environment
@@ -737,13 +680,6 @@ if __name__ == '__main__':
        default='test',
        help='split to evaluate on',
    )
-    parser.add_argument(
-        '--mode',
-        type=str,
-        default='swe',
-        choices=['swe', 'swt', 'swt-ci'],
-        help="mode to run the evaluation, either 'swe', 'swt', or 'swt-ci'",
-    )
    args, _ = parser.parse_known_args()

    # NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
@@ -780,7 +716,7 @@ if __name__ == '__main__':
    if llm_config is None:
        raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')

-    details = {'mode': args.mode}
+    details = {}
    _agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)

    dataset_descrption = (
@@ -930,7 +866,7 @@ if __name__ == '__main__':
                    # Also make sure git_patch is not empty - otherwise we fall back to previous attempt (empty patch is worse than anything else)
                    if (
                        instance['instance_id'] not in added_instance_ids
-                        and instance['test_result'].get('git_patch', '').strip()
+                        and instance['test_result']['git_patch'].strip()
                    ):
                        fout.write(line)
                        added_instance_ids.add(instance['instance_id'])
--- a/evaluation/benchmarks/swe_bench/scripts/run_infer.sh
+++ b/evaluation/benchmarks/swe_bench/scripts/run_infer.sh
@@ -12,7 +12,6 @@ NUM_WORKERS=$6
 DATASET=$7
 SPLIT=$8
 N_RUNS=$9
-MODE=${10}

 if [ -z "$NUM_WORKERS" ]; then
  NUM_WORKERS=1
@@ -46,11 +45,6 @@ if [ -z "$SPLIT" ]; then
  SPLIT="test"
 fi

-if [ -z "$MODE" ]; then
-  MODE="swe"
-  echo "MODE not specified, use default $MODE"
-fi
-
 export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
 echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"

@@ -61,10 +55,6 @@ echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
 echo "MODEL_CONFIG: $MODEL_CONFIG"
 echo "DATASET: $DATASET"
 echo "SPLIT: $SPLIT"
-echo "MAX_ITER: $MAX_ITER"
-echo "NUM_WORKERS: $NUM_WORKERS"
-echo "COMMIT_HASH: $COMMIT_HASH"
-echo "MODE: $MODE"

 # Default to NOT use Hint
 if [ -z "$USE_HINT_TEXT" ]; then
@@ -84,13 +74,9 @@ fi
 if [ -n "$EXP_NAME" ]; then
  EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
 fi
-# if mode != swe, add mode to the eval note
-if [ "$MODE" != "swe" ]; then
-  EVAL_NOTE="${EVAL_NOTE}-${MODE}"
-fi

 function run_eval() {
-  local eval_note="${1}"
+  local eval_note=$1
  COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer.py \
    --agent-cls $AGENT \
    --llm-config $MODEL_CONFIG \
@@ -98,8 +84,7 @@ function run_eval() {
    --eval-num-workers $NUM_WORKERS \
    --eval-note $eval_note \
    --dataset $DATASET \
-    --split $SPLIT \
-    --mode $MODE"
+    --split $SPLIT"

  if [ -n "$EVAL_LIMIT" ]; then
    echo "EVAL_LIMIT: $EVAL_LIMIT"
--- a/evaluation/benchmarks/swe_bench/scripts/swtbench/convert.py
+++ b/evaluation/benchmarks/swe_bench/scripts/swtbench/convert.py
@@ -1,95 +0,0 @@
-import argparse
-import json
-import logging
-
-import unidiff
-
-from evaluation.benchmarks.swe_bench.resource.swt_bench_constants import (
-    MAP_VERSION_TO_INSTALL,
-)
-
-_LOGGER = logging.getLogger(__name__)
-
-
-def remove_setup_files(model_patch: str, instance: dict, delete_setup_changes: bool):
-    """Discard all changes that a patch applies to files changes by the pre_install script and that are reproduction scripts (top-level script)"""
-    setup_files = ['setup.py', 'tox.ini', 'pyproject.toml']
-    pre_install = (
-        MAP_VERSION_TO_INSTALL.get(instance['repo'], {})
-        .get(instance['version'], {})
-        .get('pre_install', [])
-    )
-    relevant_files = (
-        [
-            file
-            for file in setup_files
-            if any(file in install and 'sed' in install for install in pre_install)
-        ]
-        if delete_setup_changes
-        else []
-    )
-    for i in range(10):
-        try:
-            # Appearently outputs.jsonl has .strip() applied, so we try to reconstruct the original patch by adding auxiliary whitespace
-            patch = unidiff.PatchSet(model_patch + i * '\n')
-            break
-        except unidiff.UnidiffParseError:
-            pass
-
-    to_delete = []
-    for i, file in enumerate(patch):
-        if (
-            any(f in file.source_file for f in relevant_files)
-            or file.target_file.count('/') == 1
-        ):
-            to_delete.append(i)
-    for i in reversed(to_delete):
-        del patch[i]
-    return str(patch)
-
-
-def main(
-    prediction_file: str,
-):
-    """Main function to extract the model patches from the OpenHands prediction file and turn them into the expected SWT-Bench format."""
-    with open(prediction_file) as f:
-        for line in f:
-            pred = json.loads(line)
-            try:
-                git_diff = pred['test_result']['git_patch']
-            except KeyError:
-                _LOGGER.warning(
-                    'Warning: No git diff found for instance %s', pred['instance_id']
-                )
-                continue
-            ci_mode = pred['metadata']['details'].get('mode', '') == 'swt-ci'
-            try:
-                git_diff = remove_setup_files(git_diff, pred['instance'], ci_mode)
-            except:  # noqa: E722
-                _LOGGER.warning(
-                    'Warning: Invalid git diff found for instance %s',
-                    pred['instance_id'],
-                )
-            print(
-                json.dumps(
-                    {
-                        'instance_id': pred['instance_id'],
-                        'model_name_or_path': f'{pred["metadata"]["llm_config"]["openrouter_app_name"]}__{pred["metadata"]["agent_class"]}__{pred["metadata"]["llm_config"]["model"]}',
-                        'model_patch': git_diff,
-                        'full_output': json.dumps(pred),
-                    }
-                )
-            )
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--prediction_file',
-        type=str,
-        required=True,
-        help='Path to the prediction file (.../outputs.jsonl)',
-    )
-    args = parser.parse_args()
-
-    main(args.prediction_file)
--- a/evaluation/benchmarks/testgeneval/run_infer.py
+++ b/evaluation/benchmarks/testgeneval/run_infer.py
@@ -30,6 +30,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
    update_llm_config_for_completions_logging,
 )
 from openhands.controller.state.state import State
@@ -164,6 +165,7 @@ def get_config(
        condenser=metadata.condenser_config,
        enable_prompt_extensions=False,
    )
+    agent_config = update_agent_config_for_eval(agent_config)
    config.set_agent_config(agent_config)
    return config

--- a/evaluation/benchmarks/the_agent_company/run_infer.py
+++ b/evaluation/benchmarks/the_agent_company/run_infer.py
@@ -13,7 +13,10 @@ from typing import List
 import yaml
 from browsing import pre_login

-from evaluation.utils.shared import get_default_sandbox_config_for_eval
+from evaluation.utils.shared import (
+    get_default_sandbox_config_for_eval,
+    update_agent_config_for_eval,
+)
 from openhands.controller.state.state import State
 from openhands.core.config import (
    AppConfig,
@@ -58,12 +61,14 @@ def get_config(
    )
    config.set_llm_config(llm_config)
    if agent_config:
+        agent_config = update_agent_config_for_eval(agent_config)
        config.set_agent_config(agent_config)
    else:
        logger.info('Agent config not provided, using default settings')
        agent_config = AgentConfig(
            enable_prompt_extensions=False,
        )
+        agent_config = update_agent_config_for_eval(agent_config)
        config.set_agent_config(agent_config)
    return config

--- a/evaluation/benchmarks/toolqa/run_infer.py
+++ b/evaluation/benchmarks/toolqa/run_infer.py
@@ -15,6 +15,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
@@ -55,6 +56,8 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
+
+    agent_config = update_agent_config_for_eval(agent_config)
    agent_config.enable_prompt_extensions = False
    return config

--- a/evaluation/benchmarks/webarena/run_infer.py
+++ b/evaluation/benchmarks/webarena/run_infer.py
@@ -16,6 +16,7 @@ from evaluation.utils.shared import (
    prepare_dataset,
    reset_logger_for_multiprocessing,
    run_evaluation,
+    update_agent_config_for_eval,
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
@@ -76,6 +77,8 @@ def get_config(
    )
    config.set_llm_config(metadata.llm_config)
    agent_config = config.get_agent_config(metadata.agent_class)
+
+    agent_config = update_agent_config_for_eval(agent_config)
    agent_config.enable_prompt_extensions = False
    return config

--- a/evaluation/utils/shared.py
+++ b/evaluation/utils/shared.py
@@ -160,6 +160,26 @@ def cleanup():
        process.join()


+def update_agent_config_for_eval(
+    agent_config: AgentConfig | None = None,
+) -> AgentConfig:
+    """Update agent config with evaluation-specific settings.
+
+    Args:
+        agent_config: The agent config to update. If None, a new AgentConfig will be created.
+
+    Returns:
+        The updated agent config.
+    """
+    if agent_config is None:
+        agent_config = AgentConfig()
+
+    # Note: We're not disabling repository memory here as requested
+    # This function can be used for other evaluation-specific settings
+
+    return agent_config
+
+
 def make_metadata(
    llm_config: LLMConfig,
    dataset_name: str,
@@ -172,6 +192,8 @@ def make_metadata(
    agent_config: AgentConfig | None = None,
    condenser_config: CondenserConfig | None = None,
 ) -> EvalMetadata:
+    # Update agent config with evaluation-specific settings
+    agent_config = update_agent_config_for_eval(agent_config)
    model_name = llm_config.model.split('/')[-1]
    model_path = model_name.replace(':', '_').replace('@', '-')
    eval_note = f'_N_{eval_note}' if eval_note else ''
--- a/frontend/.husky/pre-commit
+++ b/frontend/.husky/pre-commit
@@ -1,10 +1,3 @@
-# Run frontend checks
-echo "Running frontend checks..."
 cd frontend
 npm run check-unlocalized-strings
 npx lint-staged
-
-# Run backend pre-commit
-echo "Running backend pre-commit..."
-cd ..
-pre-commit run --files openhands/**/* evaluation/**/* tests/**/* --show-diff-on-failure --config ./dev_config/python/.pre-commit-config.yaml
--- a/frontend/tests/components/chat/chat-input.test.tsx
+++ b/frontend/tests/components/chat/chat-input.test.tsx
@@ -223,7 +223,7 @@ describe("ChatInput", () => {
    render(<ChatInput onSubmit={onSubmitMock} />);
    const textarea = screen.getByRole("textbox");
    expect(textarea).toBeInTheDocument();
-
+    
    // The actual verification of maxRows=16 is handled internally by the TextareaAutosize component
    // and affects how many rows the textarea can expand to
  });
--- a/frontend/tests/components/chat/chat-interface.test.tsx
+++ b/frontend/tests/components/chat/chat-interface.test.tsx
@@ -1,8 +1,8 @@
 import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
+import type { Message } from "#/message";
 import { act, screen, waitFor, within } from "@testing-library/react";
 import userEvent from "@testing-library/user-event";
 import { renderWithProviders } from "test-utils";
-import type { Message } from "#/message";
 import { addUserMessage } from "#/state/chat-slice";
 import { SUGGESTIONS } from "#/utils/suggestions";
 import * as ChatSlice from "#/state/chat-slice";
@@ -45,15 +45,7 @@ describe("Empty state", () => {
  it("should render suggestions if empty", () => {
    const { store } = renderWithProviders(<ChatInterface />, {
      preloadedState: {
-        chat: {
-          messages: [],
-          systemMessage: {
-            content: "",
-            tools: [],
-            openhands_version: null,
-            agent_class: null
-          }
-        },
+        chat: { messages: [] },
      },
    });

@@ -76,15 +68,7 @@ describe("Empty state", () => {
  it("should render the default suggestions", () => {
    renderWithProviders(<ChatInterface />, {
      preloadedState: {
-        chat: {
-          messages: [],
-          systemMessage: {
-            content: "",
-            tools: [],
-            openhands_version: null,
-            agent_class: null
-          }
-        },
+        chat: { messages: [] },
      },
    });

@@ -114,15 +98,7 @@ describe("Empty state", () => {
      const user = userEvent.setup();
      const { store } = renderWithProviders(<ChatInterface />, {
        preloadedState: {
-          chat: {
-            messages: [],
-            systemMessage: {
-              content: "",
-              tools: [],
-              openhands_version: null,
-              agent_class: null
-            }
-          },
+          chat: { messages: [] },
        },
      });

@@ -151,15 +127,7 @@ describe("Empty state", () => {
      const user = userEvent.setup();
      const { rerender } = renderWithProviders(<ChatInterface />, {
        preloadedState: {
-          chat: {
-            messages: [],
-            systemMessage: {
-              content: "",
-              tools: [],
-              openhands_version: null,
-              agent_class: null
-            }
-          },
+          chat: { messages: [] },
        },
      });

--- a/frontend/tests/components/chat/expandable-message.test.tsx
+++ b/frontend/tests/components/chat/expandable-message.test.tsx
@@ -95,23 +95,6 @@ describe("ExpandableMessage", () => {
    expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
  });

-  it("should render with neutral border and no icon for action messages with undefined success (timeout case)", () => {
-    renderWithProviders(
-      <ExpandableMessage
-        id="OBSERVATION_MESSAGE$RUN"
-        message="Command timed out"
-        type="action"
-        success={undefined}
-      />,
-    );
-    const element = screen.getByText("OBSERVATION_MESSAGE$RUN");
-    const container = element.closest(
-      "div.flex.gap-2.items-center.justify-start",
-    );
-    expect(container).toHaveClass("border-neutral-300");
-    expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
-  });
-
  it("should render the out of credits message when the user is out of credits", async () => {
    const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
    // @ts-expect-error - We only care about the APP_MODE and FEATURE_FLAGS fields
--- a/frontend/tests/components/features/auth-modal.test.tsx
+++ b/frontend/tests/components/features/auth-modal.test.tsx
@@ -3,46 +3,34 @@ import { it, describe, expect, vi, beforeAll, afterAll } from "vitest";
 import userEvent from "@testing-library/user-event";
 import { AuthModal } from "#/components/features/waitlist/auth-modal";
 import * as CaptureConsent from "#/utils/handle-capture-consent";
-import * as AuthHook from "#/context/auth-context";

 describe("AuthModal", () => {
  beforeAll(() => {
    vi.stubGlobal("location", { href: "" });
-    vi.spyOn(AuthHook, "useAuth").mockReturnValue({
-      providersAreSet: false,
-      setProvidersAreSet: vi.fn(),
-      providerTokensSet: [],
-      setProviderTokensSet: vi.fn()
-    });
  });

  afterAll(() => {
    vi.unstubAllGlobals();
-    vi.restoreAllMocks();
  });

  it("should render a tos checkbox that is unchecked by default", () => {
-    render(<AuthModal githubAuthUrl={null} appMode="saas" />);
+    render(<AuthModal githubAuthUrl={null} />);
    const checkbox = screen.getByRole("checkbox");

    expect(checkbox).not.toBeChecked();
  });

-  it("should only enable the identity provider buttons if the tos checkbox is checked", async () => {
+  it("should only enable the GitHub button if the tos checkbox is checked", async () => {
    const user = userEvent.setup();
-    render(<AuthModal githubAuthUrl={null} appMode="saas" />);
-
+    render(<AuthModal githubAuthUrl={null} />);
    const checkbox = screen.getByRole("checkbox");
-    const githubButton = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
-    const gitlabButton = screen.getByRole("button", { name: "GITLAB$CONNECT_TO_GITLAB" });
+    const button = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });

-    expect(githubButton).toBeDisabled();
-    expect(gitlabButton).toBeDisabled();
+    expect(button).toBeDisabled();

    await user.click(checkbox);

-    expect(githubButton).not.toBeDisabled();
-    expect(gitlabButton).not.toBeDisabled();
+    expect(button).not.toBeDisabled();
  });

  it("should set user analytics consent to true when the user checks the tos checkbox", async () => {
@@ -52,7 +40,7 @@ describe("AuthModal", () => {
    );

    const user = userEvent.setup();
-    render(<AuthModal githubAuthUrl="mock-url" appMode="saas" />);
+    render(<AuthModal githubAuthUrl="mock-url" />);

    const checkbox = screen.getByRole("checkbox");
    await user.click(checkbox);
--- a/frontend/tests/components/features/git/git-repo-selector.test.tsx
+++ b/frontend/tests/components/features/git/git-repo-selector.test.tsx
@@ -56,16 +56,12 @@ describe("GitRepositorySelector", () => {
        full_name: "test/repo1",
        git_provider: "github" as Provider,
        stargazers_count: 100,
-        is_public: true,
-        pushed_at: "2023-01-01T00:00:00Z",
      },
      {
        id: 2,
        full_name: "test/repo2",
        git_provider: "github" as Provider,
        stargazers_count: 200,
-        is_public: true,
-        pushed_at: "2023-01-02T00:00:00Z",
      },
    ];

--- a/frontend/tests/components/features/home/home-header.test.tsx
+++ b/frontend/tests/components/features/home/home-header.test.tsx
@@ -1,70 +0,0 @@
-import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
-import { render, screen } from "@testing-library/react";
-import { Provider } from "react-redux";
-import { createRoutesStub } from "react-router";
-import { setupStore } from "test-utils";
-import { describe, expect, it, vi } from "vitest";
-import userEvent from "@testing-library/user-event";
-import { AuthProvider } from "#/context/auth-context";
-import { HomeHeader } from "#/components/features/home/home-header";
-import OpenHands from "#/api/open-hands";
-
-const renderHomeHeader = () => {
-  const RouterStub = createRoutesStub([
-    {
-      Component: HomeHeader,
-      path: "/",
-    },
-    {
-      Component: () => <div data-testid="conversation-screen" />,
-      path: "/conversations/:conversationId",
-    },
-  ]);
-
-  return render(<RouterStub />, {
-    wrapper: ({ children }) => (
-      <Provider store={setupStore()}>
-        <AuthProvider initialProvidersAreSet>
-          <QueryClientProvider client={new QueryClient()}>
-            {children}
-          </QueryClientProvider>
-        </AuthProvider>
-      </Provider>
-    ),
-  });
-};
-
-describe("HomeHeader", () => {
-  it("should create an empty conversation and redirect when pressing the launch from scratch button", async () => {
-    const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
-
-    renderHomeHeader();
-
-    const launchButton = screen.getByRole("button", {
-      name: /launch from scratch/i,
-    });
-    await userEvent.click(launchButton);
-
-    expect(createConversationSpy).toHaveBeenCalledExactlyOnceWith(
-      undefined,
-      undefined,
-      [],
-      undefined,
-    );
-
-    // expect to be redirected to /conversations/:conversationId
-    await screen.findByTestId("conversation-screen");
-  });
-
-  it("should change the launch button text to 'Loading...' when creating a conversation", async () => {
-    renderHomeHeader();
-
-    const launchButton = screen.getByRole("button", {
-      name: /launch from scratch/i,
-    });
-    await userEvent.click(launchButton);
-
-    expect(launchButton).toHaveTextContent(/Loading/i);
-    expect(launchButton).toBeDisabled();
-  });
-});
--- a/frontend/tests/components/features/home/repo-connector.test.tsx
+++ b/frontend/tests/components/features/home/repo-connector.test.tsx
@@ -1,229 +0,0 @@
-import { render, screen, waitFor, within } from "@testing-library/react";
-import { describe, expect, it, vi } from "vitest";
-import userEvent from "@testing-library/user-event";
-import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
-import { setupStore } from "test-utils";
-import { Provider } from "react-redux";
-import { createRoutesStub } from "react-router";
-import OpenHands from "#/api/open-hands";
-import { AuthProvider } from "#/context/auth-context";
-import { GitRepository } from "#/types/git";
-import * as GitService from "#/api/git";
-import { RepoConnector } from "#/components/features/home/repo-connector";
-
-const renderRepoConnector = (initialProvidersAreSet = true) => {
-  const mockRepoSelection = vi.fn();
-  const RouterStub = createRoutesStub([
-    {
-      Component: () => <RepoConnector onRepoSelection={mockRepoSelection} />,
-      path: "/",
-    },
-    {
-      Component: () => <div data-testid="conversation-screen" />,
-      path: "/conversations/:conversationId",
-    },
-    {
-      Component: () => <div data-testid="settings-screen" />,
-      path: "/settings",
-    },
-  ]);
-
-  return render(<RouterStub />, {
-    wrapper: ({ children }) => (
-      <Provider store={setupStore()}>
-        <AuthProvider initialProvidersAreSet={initialProvidersAreSet}>
-          <QueryClientProvider client={new QueryClient()}>
-            {children}
-          </QueryClientProvider>
-        </AuthProvider>
-      </Provider>
-    ),
-  });
-};
-
-const MOCK_RESPOSITORIES: GitRepository[] = [
-  {
-    id: 1,
-    full_name: "rbren/polaris",
-    git_provider: "github",
-    is_public: true,
-  },
-  {
-    id: 2,
-    full_name: "All-Hands-AI/OpenHands",
-    git_provider: "github",
-    is_public: true,
-  },
-];
-
-describe("RepoConnector", () => {
-  it("should render the repository connector section", () => {
-    renderRepoConnector();
-    screen.getByTestId("repo-connector");
-  });
-
-  it("should render the available repositories in the dropdown", async () => {
-    const retrieveUserGitRepositoriesSpy = vi.spyOn(
-      GitService,
-      "retrieveUserGitRepositories",
-    );
-    retrieveUserGitRepositoriesSpy.mockResolvedValue({
-      data: MOCK_RESPOSITORIES,
-      nextPage: null,
-    });
-
-    renderRepoConnector();
-
-    // Wait for the loading state to be replaced with the dropdown
-    const dropdown = await waitFor(() => screen.getByTestId("repo-dropdown"));
-    await userEvent.click(dropdown);
-
-    await waitFor(() => {
-      screen.getByText("rbren/polaris");
-      screen.getByText("All-Hands-AI/OpenHands");
-    });
-  });
-
-  it("should only enable the launch button if a repo is selected", async () => {
-    const retrieveUserGitRepositoriesSpy = vi.spyOn(
-      GitService,
-      "retrieveUserGitRepositories",
-    );
-    retrieveUserGitRepositoriesSpy.mockResolvedValue({
-      data: MOCK_RESPOSITORIES,
-      nextPage: null,
-    });
-
-    renderRepoConnector();
-
-    const launchButton = screen.getByTestId("repo-launch-button");
-    expect(launchButton).toBeDisabled();
-
-    // Wait for the loading state to be replaced with the dropdown
-    const dropdown = await waitFor(() => screen.getByTestId("repo-dropdown"));
-    await userEvent.click(dropdown);
-    await userEvent.click(screen.getByText("rbren/polaris"));
-
-    expect(launchButton).toBeEnabled();
-  });
-
-  it("should render the 'add git(hub|lab) repos' links if saas mode", async () => {
-    const getConfiSpy = vi.spyOn(OpenHands, "getConfig");
-    // @ts-expect-error - only return the APP_MODE
-    getConfiSpy.mockResolvedValue({
-      APP_MODE: "saas",
-    });
-
-    renderRepoConnector();
-
-    await screen.findByText("Add GitHub repos");
-  });
-
-  it("should not render the 'add git(hub|lab) repos' links if oss mode", async () => {
-    const getConfiSpy = vi.spyOn(OpenHands, "getConfig");
-    // @ts-expect-error - only return the APP_MODE
-    getConfiSpy.mockResolvedValue({
-      APP_MODE: "oss",
-    });
-
-    renderRepoConnector();
-
-    expect(screen.queryByText("Add GitHub repos")).not.toBeInTheDocument();
-    expect(screen.queryByText("Add GitLab repos")).not.toBeInTheDocument();
-  });
-
-  it("should create a conversation and redirect with the selected repo when pressing the launch button", async () => {
-    const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
-    const retrieveUserGitRepositoriesSpy = vi.spyOn(
-      GitService,
-      "retrieveUserGitRepositories",
-    );
-    retrieveUserGitRepositoriesSpy.mockResolvedValue({
-      data: MOCK_RESPOSITORIES,
-      nextPage: null,
-    });
-
-    renderRepoConnector();
-
-    const repoConnector = screen.getByTestId("repo-connector");
-    const launchButton =
-      within(repoConnector).getByTestId("repo-launch-button");
-    await userEvent.click(launchButton);
-
-    // repo not selected yet
-    expect(createConversationSpy).not.toHaveBeenCalled();
-
-    // select a repository from the dropdown
-    const dropdown = await waitFor(() =>
-      within(repoConnector).getByTestId("repo-dropdown")
-    );
-    await userEvent.click(dropdown);
-
-    const repoOption = screen.getByText("rbren/polaris");
-    await userEvent.click(repoOption);
-    await userEvent.click(launchButton);
-
-    expect(createConversationSpy).toHaveBeenCalledExactlyOnceWith(
-      {
-        full_name: "rbren/polaris",
-        git_provider: "github",
-        id: 1,
-        is_public: true,
-      },
-      undefined,
-      [],
-      undefined,
-    );
-  });
-
-  it("should change the launch button text to 'Loading...' when creating a conversation", async () => {
-    const retrieveUserGitRepositoriesSpy = vi.spyOn(
-      GitService,
-      "retrieveUserGitRepositories",
-    );
-    retrieveUserGitRepositoriesSpy.mockResolvedValue({
-      data: MOCK_RESPOSITORIES,
-      nextPage: null,
-    });
-
-    renderRepoConnector();
-
-    const launchButton = screen.getByTestId("repo-launch-button");
-
-    // Wait for the loading state to be replaced with the dropdown
-    const dropdown = await waitFor(() => screen.getByTestId("repo-dropdown"));
-    await userEvent.click(dropdown);
-    await userEvent.click(screen.getByText("rbren/polaris"));
-
-    await userEvent.click(launchButton);
-    expect(launchButton).toBeDisabled();
-    expect(launchButton).toHaveTextContent(/Loading/i);
-  });
-
-  it("should not display a button to settings if the user is signed in with their git provider", async () => {
-    renderRepoConnector(true);
-    expect(
-      screen.queryByTestId("navigate-to-settings-button"),
-    ).not.toBeInTheDocument();
-  });
-
-  it("should display a button to settings if the user needs to sign in with their git provider", async () => {
-    renderRepoConnector(false);
-
-    const goToSettingsButton = await screen.findByTestId(
-      "navigate-to-settings-button",
-    );
-    const dropdown = screen.queryByTestId("repo-dropdown");
-    const launchButton = screen.queryByTestId("repo-launch-button");
-    const providerLinks = screen.queryAllByText(/add git(hub|lab) repos/i);
-
-    expect(dropdown).not.toBeInTheDocument();
-    expect(launchButton).not.toBeInTheDocument();
-    expect(providerLinks.length).toBe(0);
-
-    expect(goToSettingsButton).toBeInTheDocument();
-
-    await userEvent.click(goToSettingsButton);
-    await screen.findByTestId("settings-screen");
-  });
-});
--- a/frontend/tests/components/features/home/task-card.test.tsx
+++ b/frontend/tests/components/features/home/task-card.test.tsx
@@ -1,206 +0,0 @@
-import { render, screen } from "@testing-library/react";
-import { beforeEach, describe, expect, it, vi } from "vitest";
-import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
-import userEvent from "@testing-library/user-event";
-import { Provider } from "react-redux";
-import { createRoutesStub } from "react-router";
-import { setupStore } from "test-utils";
-import { SuggestedTask } from "#/components/features/home/tasks/task.types";
-import OpenHands from "#/api/open-hands";
-import { AuthProvider } from "#/context/auth-context";
-import { TaskCard } from "#/components/features/home/tasks/task-card";
-import * as GitService from "#/api/git";
-import { GitRepository } from "#/types/git";
-import {
-  getFailingChecksPrompt,
-  getMergeConflictPrompt,
-  getOpenIssuePrompt,
-  getUnresolvedCommentsPrompt,
-} from "#/components/features/home/tasks/get-prompt-for-query";
-
-const MOCK_TASK_1: SuggestedTask = {
-  issue_number: 123,
-  repo: "repo1",
-  title: "Task 1",
-  task_type: "MERGE_CONFLICTS",
-  git_provider: "github",
-};
-
-const MOCK_TASK_2: SuggestedTask = {
-  issue_number: 456,
-  repo: "repo2",
-  title: "Task 2",
-  task_type: "FAILING_CHECKS",
-  git_provider: "github",
-};
-
-const MOCK_TASK_3: SuggestedTask = {
-  issue_number: 789,
-  repo: "repo3",
-  title: "Task 3",
-  task_type: "UNRESOLVED_COMMENTS",
-  git_provider: "gitlab",
-};
-
-const MOCK_TASK_4: SuggestedTask = {
-  issue_number: 101112,
-  repo: "repo4",
-  title: "Task 4",
-  task_type: "OPEN_ISSUE",
-  git_provider: "gitlab",
-};
-
-const MOCK_RESPOSITORIES: GitRepository[] = [
-  { id: 1, full_name: "repo1", git_provider: "github", is_public: true },
-  { id: 2, full_name: "repo2", git_provider: "github", is_public: true },
-  { id: 3, full_name: "repo3", git_provider: "gitlab", is_public: true },
-  { id: 4, full_name: "repo4", git_provider: "gitlab", is_public: true },
-];
-
-const renderTaskCard = (task = MOCK_TASK_1) => {
-  const RouterStub = createRoutesStub([
-    {
-      Component: () => <TaskCard task={task} />,
-      path: "/",
-    },
-    {
-      Component: () => <div data-testid="conversation-screen" />,
-      path: "/conversations/:conversationId",
-    },
-  ]);
-
-  return render(<RouterStub />, {
-    wrapper: ({ children }) => (
-      <Provider store={setupStore()}>
-        <AuthProvider initialProvidersAreSet>
-          <QueryClientProvider client={new QueryClient()}>
-            {children}
-          </QueryClientProvider>
-        </AuthProvider>
-      </Provider>
-    ),
-  });
-};
-
-describe("TaskCard", () => {
-  it("format the issue id", async () => {
-    renderTaskCard();
-
-    const taskId = screen.getByTestId("task-id");
-    expect(taskId).toHaveTextContent(/#123/i);
-  });
-
-  it("should call createConversation when clicking the launch button", async () => {
-    const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
-
-    renderTaskCard();
-
-    const launchButton = screen.getByTestId("task-launch-button");
-    await userEvent.click(launchButton);
-
-    expect(createConversationSpy).toHaveBeenCalled();
-  });
-
-  describe("creating conversation prompts", () => {
-    beforeEach(() => {
-      const retrieveUserGitRepositoriesSpy = vi.spyOn(
-        GitService,
-        "retrieveUserGitRepositories",
-      );
-      retrieveUserGitRepositoriesSpy.mockResolvedValue({
-        data: MOCK_RESPOSITORIES,
-        nextPage: null,
-      });
-    });
-
-    it("should call create conversation with the merge conflict prompt", async () => {
-      const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
-
-      renderTaskCard(MOCK_TASK_1);
-
-      const launchButton = screen.getByTestId("task-launch-button");
-      await userEvent.click(launchButton);
-
-      expect(createConversationSpy).toHaveBeenCalledWith(
-        MOCK_RESPOSITORIES[0],
-        getMergeConflictPrompt(
-          MOCK_TASK_1.git_provider,
-          MOCK_TASK_1.issue_number,
-          MOCK_TASK_1.repo,
-        ),
-        [],
-        undefined,
-      );
-    });
-
-    it("should call create conversation with the failing checks prompt", async () => {
-      const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
-
-      renderTaskCard(MOCK_TASK_2);
-
-      const launchButton = screen.getByTestId("task-launch-button");
-      await userEvent.click(launchButton);
-
-      expect(createConversationSpy).toHaveBeenCalledWith(
-        MOCK_RESPOSITORIES[1],
-        getFailingChecksPrompt(
-          MOCK_TASK_2.git_provider,
-          MOCK_TASK_2.issue_number,
-          MOCK_TASK_2.repo,
-        ),
-        [],
-        undefined,
-      );
-    });
-
-    it("should call create conversation with the unresolved comments prompt", async () => {
-      const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
-
-      renderTaskCard(MOCK_TASK_3);
-
-      const launchButton = screen.getByTestId("task-launch-button");
-      await userEvent.click(launchButton);
-
-      expect(createConversationSpy).toHaveBeenCalledWith(
-        MOCK_RESPOSITORIES[2],
-        getUnresolvedCommentsPrompt(
-          MOCK_TASK_3.git_provider,
-          MOCK_TASK_3.issue_number,
-          MOCK_TASK_3.repo,
-        ),
-        [],
-        undefined,
-      );
-    });
-
-    it("should call create conversation with the open issue prompt", async () => {
-      const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
-
-      renderTaskCard(MOCK_TASK_4);
-
-      const launchButton = screen.getByTestId("task-launch-button");
-      await userEvent.click(launchButton);
-
-      expect(createConversationSpy).toHaveBeenCalledWith(
-        MOCK_RESPOSITORIES[3],
-        getOpenIssuePrompt(
-          MOCK_TASK_4.git_provider,
-          MOCK_TASK_4.issue_number,
-          MOCK_TASK_4.repo,
-        ),
-        [],
-        undefined,
-      );
-    });
-  });
-
-  it("should disable the launch button and update text content when creating a conversation", async () => {
-    renderTaskCard();
-
-    const launchButton = screen.getByTestId("task-launch-button");
-    await userEvent.click(launchButton);
-
-    expect(launchButton).toHaveTextContent(/Loading/i);
-    expect(launchButton).toBeDisabled();
-  });
-});
--- a/frontend/tests/components/features/home/task-suggestions.test.tsx
+++ b/frontend/tests/components/features/home/task-suggestions.test.tsx
@@ -1,113 +0,0 @@
-import { render, screen, waitFor } from "@testing-library/react";
-import { afterEach, describe, expect, it, vi } from "vitest";
-import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
-import { Provider } from "react-redux";
-import { createRoutesStub } from "react-router";
-import { setupStore } from "test-utils";
-import userEvent from "@testing-library/user-event";
-import { TaskSuggestions } from "#/components/features/home/tasks/task-suggestions";
-import { SuggestionsService } from "#/api/suggestions-service/suggestions-service.api";
-import { MOCK_TASKS } from "#/mocks/task-suggestions-handlers";
-import { AuthProvider } from "#/context/auth-context";
-
-const renderTaskSuggestions = (initialProvidersAreSet = true) => {
-  const RouterStub = createRoutesStub([
-    {
-      Component: TaskSuggestions,
-      path: "/",
-    },
-    {
-      Component: () => <div data-testid="conversation-screen" />,
-      path: "/conversations/:conversationId",
-    },
-    {
-      Component: () => <div data-testid="settings-screen" />,
-      path: "/settings",
-    },
-  ]);
-
-  return render(<RouterStub />, {
-    wrapper: ({ children }) => (
-      <Provider store={setupStore()}>
-        <AuthProvider initialProvidersAreSet={initialProvidersAreSet}>
-          <QueryClientProvider client={new QueryClient()}>
-            {children}
-          </QueryClientProvider>
-        </AuthProvider>
-      </Provider>
-    ),
-  });
-};
-
-describe("TaskSuggestions", () => {
-  const getSuggestedTasksSpy = vi.spyOn(
-    SuggestionsService,
-    "getSuggestedTasks",
-  );
-
-  afterEach(() => {
-    vi.clearAllMocks();
-  });
-
-  it("should render the task suggestions section", () => {
-    renderTaskSuggestions();
-    screen.getByTestId("task-suggestions");
-  });
-
-  it("should render an empty message if there are no tasks", async () => {
-    getSuggestedTasksSpy.mockResolvedValue([]);
-    renderTaskSuggestions();
-    await screen.findByText(/No tasks available/i);
-  });
-
-  it("should render the task groups with the correct titles", async () => {
-    getSuggestedTasksSpy.mockResolvedValue(MOCK_TASKS);
-    renderTaskSuggestions();
-
-    await waitFor(() => {
-      MOCK_TASKS.forEach((taskGroup) => {
-        screen.getByText(taskGroup.title);
-      });
-    });
-  });
-
-  it("should render the task cards with the correct task details", async () => {
-    getSuggestedTasksSpy.mockResolvedValue(MOCK_TASKS);
-    renderTaskSuggestions();
-
-    await waitFor(() => {
-      MOCK_TASKS.forEach((task) => {
-        screen.getByText(task.title);
-      });
-    });
-  });
-
-  it("should render skeletons when loading", async () => {
-    getSuggestedTasksSpy.mockResolvedValue(MOCK_TASKS);
-    renderTaskSuggestions();
-
-    const skeletons = screen.getAllByTestId("task-group-skeleton");
-    expect(skeletons.length).toBeGreaterThan(0);
-
-    await waitFor(() => {
-      MOCK_TASKS.forEach((taskGroup) => {
-        screen.getByText(taskGroup.title);
-      });
-    });
-
-    expect(screen.queryByTestId("task-group-skeleton")).not.toBeInTheDocument();
-  });
-
-  it("should display a button to settings if the user needs to sign in with their git provider", async () => {
-    renderTaskSuggestions(false);
-
-    expect(getSuggestedTasksSpy).not.toHaveBeenCalled();
-    const goToSettingsButton = await screen.findByTestId(
-      "navigate-to-settings-button",
-    );
-    expect(goToSettingsButton).toBeInTheDocument();
-
-    await userEvent.click(goToSettingsButton);
-    await screen.findByTestId("settings-screen");
-  });
-});
--- a/frontend/tests/components/features/payment/payment-form.test.tsx
+++ b/frontend/tests/components/features/payment/payment-form.test.tsx
@@ -61,25 +61,25 @@ describe("PaymentForm", () => {
    renderPaymentForm();

    const topUpInput = await screen.findByTestId("top-up-input");
-    await user.type(topUpInput, "50");
+    await user.type(topUpInput, "50.12");

    const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
    await user.click(topUpButton);

-    expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50);
+    expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50.12);
  });

-  it("should only accept integer values", async () => {
+  it("should round the top-up amount to two decimal places", async () => {
    const user = userEvent.setup();
    renderPaymentForm();

    const topUpInput = await screen.findByTestId("top-up-input");
-    await user.type(topUpInput, "50");
+    await user.type(topUpInput, "50.125456");

    const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
    await user.click(topUpButton);

-    expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50);
+    expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50.13);
  });

  it("should disable the top-up button if the user enters an invalid amount", async () => {
@@ -100,7 +100,7 @@ describe("PaymentForm", () => {
    renderPaymentForm();

    const topUpInput = await screen.findByTestId("top-up-input");
-    await user.type(topUpInput, "50");
+    await user.type(topUpInput, "50.12");

    const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
    await user.click(topUpButton);
@@ -114,7 +114,7 @@ describe("PaymentForm", () => {
      renderPaymentForm();

      const topUpInput = await screen.findByTestId("top-up-input");
-      await user.type(topUpInput, "-50");
+      await user.type(topUpInput, "-50.12");

      const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
      await user.click(topUpButton);
@@ -139,8 +139,6 @@ describe("PaymentForm", () => {
      const user = userEvent.setup();
      renderPaymentForm();

-      // With type="number", the browser would prevent non-numeric input,
-      // but we'll test the validation logic anyway
      const topUpInput = await screen.findByTestId("top-up-input");
      await user.type(topUpInput, "abc");

@@ -162,19 +160,5 @@ describe("PaymentForm", () => {

      expect(createCheckoutSessionSpy).not.toHaveBeenCalled();
    });
-
-    test("user enters a decimal value", async () => {
-      const user = userEvent.setup();
-      renderPaymentForm();
-
-      // With step="1", the browser would validate this, but we'll test our validation logic
-      const topUpInput = await screen.findByTestId("top-up-input");
-      await user.type(topUpInput, "50.5");
-
-      const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
-      await user.click(topUpButton);
-
-      expect(createCheckoutSessionSpy).not.toHaveBeenCalled();
-    });
  });
 });
--- a/frontend/tests/components/file-operations.test.tsx
+++ b/frontend/tests/components/file-operations.test.tsx
@@ -1,18 +1,9 @@
 import { render, screen } from "@testing-library/react";
-import { describe, it, expect, vi } from "vitest";
+import { describe, it, expect } from "vitest";
 import { Messages } from "#/components/features/chat/messages";
 import type { Message } from "#/message";
 import { renderWithProviders } from "test-utils";

-// Mock the useParams hook to provide a conversationId
-vi.mock("react-router", async () => {
-  const actual = await vi.importActual<typeof import("react-router")>("react-router");
-  return {
-    ...actual,
-    useParams: () => ({ conversationId: "test-conversation-id" }),
-  };
-});
-
 describe("File Operations Messages", () => {
  it("should show success indicator for successful file read operation", () => {
    const messages: Message[] = [
--- a/frontend/tests/components/terminal/terminal.test.tsx
+++ b/frontend/tests/components/terminal/terminal.test.tsx
@@ -5,7 +5,7 @@ import { Command, appendInput, appendOutput } from "#/state/command-slice";
 import Terminal from "#/components/features/terminal/terminal";

 const renderTerminal = (commands: Command[] = []) =>
-  renderWithProviders(<Terminal />, {
+  renderWithProviders(<Terminal secrets={[]} />, {
    preloadedState: {
      cmd: {
        commands,
@@ -121,7 +121,7 @@ describe.skip("Terminal", () => {

  // This test fails because it expects `disposeMock` to have been called before the component is unmounted.
  it.skip("should dispose the terminal on unmount", () => {
-    const { unmount } = renderWithProviders(<Terminal />);
+    const { unmount } = renderWithProviders(<Terminal secrets={[]} />);

    expect(mockTerminal.dispose).not.toHaveBeenCalled();

--- a/frontend/tests/hooks/use-terminal.test.tsx
+++ b/frontend/tests/hooks/use-terminal.test.tsx
@@ -1,31 +1,31 @@
 import { beforeAll, describe, expect, it, vi } from "vitest";
+import { render } from "@testing-library/react";
 import { afterEach } from "node:test";
+import { ReactNode } from "react";
 import { useTerminal } from "#/hooks/use-terminal";
 import { Command } from "#/state/command-slice";
-import { AgentState } from "#/types/agent-state";
-import { renderWithProviders } from "../../test-utils";
-
-// Mock the WsClient context
-vi.mock("#/context/ws-client-provider", () => ({
-  useWsClient: () => ({
-    send: vi.fn(),
-    status: "CONNECTED",
-    isLoadingMessages: false,
-    events: [],
-  }),
-}));

 interface TestTerminalComponentProps {
  commands: Command[];
+  secrets: string[];
 }

 function TestTerminalComponent({
  commands,
+  secrets,
 }: TestTerminalComponentProps) {
-  const ref = useTerminal({ commands });
+  const ref = useTerminal({ commands, secrets, disabled: false });
  return <div ref={ref} />;
 }

+interface WrapperProps {
+  children: ReactNode;
+}
+
+function Wrapper({ children }: WrapperProps) {
+  return <div>{children}</div>;
+}
+
 describe("useTerminal", () => {
  const mockTerminal = vi.hoisted(() => ({
    loadAddon: vi.fn(),
@@ -57,11 +57,8 @@ describe("useTerminal", () => {
  });

  it("should render", () => {
-    renderWithProviders(<TestTerminalComponent commands={[]} />, {
-      preloadedState: {
-        agent: { curAgentState: AgentState.RUNNING },
-        cmd: { commands: [] },
-      },
+    render(<TestTerminalComponent commands={[]} secrets={[]} />, {
+      wrapper: Wrapper,
    });
  });

@@ -71,19 +68,15 @@ describe("useTerminal", () => {
      { content: "hello", type: "output" },
    ];

-    renderWithProviders(<TestTerminalComponent commands={commands} />, {
-      preloadedState: {
-        agent: { curAgentState: AgentState.RUNNING },
-        cmd: { commands },
-      },
+    render(<TestTerminalComponent commands={commands} secrets={[]} />, {
+      wrapper: Wrapper,
    });

    expect(mockTerminal.writeln).toHaveBeenNthCalledWith(1, "echo hello");
    expect(mockTerminal.writeln).toHaveBeenNthCalledWith(2, "hello");
  });

-  // This test is no longer relevant as secrets filtering has been removed
-  it.skip("should hide secrets in the terminal", () => {
+  it("should hide secrets in the terminal", () => {
    const secret = "super_secret_github_token";
    const anotherSecret = "super_secret_another_token";
    const commands: Command[] = [
@@ -94,18 +87,23 @@ describe("useTerminal", () => {
      { content: secret, type: "output" },
    ];

-    renderWithProviders(
+    render(
      <TestTerminalComponent
        commands={commands}
+        secrets={[secret, anotherSecret]}
      />,
      {
-        preloadedState: {
-          agent: { curAgentState: AgentState.RUNNING },
-          cmd: { commands },
-        },
+        wrapper: Wrapper,
      },
    );

-    // This test is no longer relevant as secrets filtering has been removed
+    // BUG: `vi.clearAllMocks()` does not clear the number of calls
+    // therefore, we need to assume the order of the calls based
+    // on the test order
+    expect(mockTerminal.writeln).toHaveBeenNthCalledWith(
+      3,
+      `export GITHUB_TOKEN=${"*".repeat(10)},${"*".repeat(10)},${"*".repeat(10)}`,
+    );
+    expect(mockTerminal.writeln).toHaveBeenNthCalledWith(4, "*".repeat(10));
  });
 });
--- a/frontend/tests/routes/home-screen.test.tsx
+++ b/frontend/tests/routes/home-screen.test.tsx
@@ -1,370 +0,0 @@
-import { render, screen, waitFor, within } from "@testing-library/react";
-import { beforeEach, describe, expect, it, vi } from "vitest";
-import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
-import userEvent from "@testing-library/user-event";
-import { createRoutesStub } from "react-router";
-import { Provider } from "react-redux";
-import { setupStore } from "test-utils";
-import { AxiosError } from "axios";
-import HomeScreen from "#/routes/home";
-import { AuthProvider } from "#/context/auth-context";
-import * as GitService from "#/api/git";
-import { GitRepository } from "#/types/git";
-import OpenHands from "#/api/open-hands";
-import MainApp from "#/routes/root-layout";
-
-const createAxiosNotFoundErrorObject = () =>
-  new AxiosError(
-    "Request failed with status code 404",
-    "ERR_BAD_REQUEST",
-    undefined,
-    undefined,
-    {
-      status: 404,
-      statusText: "Not Found",
-      data: { message: "Settings not found" },
-      headers: {},
-      // @ts-expect-error - we only need the response object for this test
-      config: {},
-    },
-  );
-
-const RouterStub = createRoutesStub([
-  {
-    Component: MainApp,
-    path: "/",
-    children: [
-      {
-        Component: HomeScreen,
-        path: "/",
-      },
-      {
-        Component: () => <div data-testid="conversation-screen" />,
-        path: "/conversations/:conversationId",
-      },
-      {
-        Component: () => <div data-testid="settings-screen" />,
-        path: "/settings",
-      },
-    ],
-  },
-]);
-
-const renderHomeScreen = (initialProvidersAreSet = true) =>
-  render(<RouterStub />, {
-    wrapper: ({ children }) => (
-      <Provider store={setupStore()}>
-        <AuthProvider initialProvidersAreSet={initialProvidersAreSet}>
-          <QueryClientProvider client={new QueryClient()}>
-            {children}
-          </QueryClientProvider>
-        </AuthProvider>
-      </Provider>
-    ),
-  });
-
-const MOCK_RESPOSITORIES: GitRepository[] = [
-  {
-    id: 1,
-    full_name: "octocat/hello-world",
-    git_provider: "github",
-    is_public: true,
-  },
-  {
-    id: 2,
-    full_name: "octocat/earth",
-    git_provider: "github",
-    is_public: true,
-  },
-];
-
-describe("HomeScreen", () => {
-  it("should render", () => {
-    renderHomeScreen();
-    screen.getByTestId("home-screen");
-  });
-
-  it("should render the repository connector and suggested tasks sections", async () => {
-    renderHomeScreen();
-
-    screen.getByTestId("repo-connector");
-    screen.getByTestId("task-suggestions");
-  });
-
-  it("should filter the suggested tasks based on the selected repository", async () => {
-    const retrieveUserGitRepositoriesSpy = vi.spyOn(
-      GitService,
-      "retrieveUserGitRepositories",
-    );
-    retrieveUserGitRepositoriesSpy.mockResolvedValue({
-      data: MOCK_RESPOSITORIES,
-      nextPage: null,
-    });
-
-    renderHomeScreen();
-
-    const taskSuggestions = screen.getByTestId("task-suggestions");
-
-    // Initially, all tasks should be visible
-    await waitFor(() => {
-      within(taskSuggestions).getByText("octocat/hello-world");
-      within(taskSuggestions).getByText("octocat/earth");
-    });
-
-    // Select a repository from the dropdown
-    const repoConnector = screen.getByTestId("repo-connector");
-
-    const dropdown = within(repoConnector).getByTestId("repo-dropdown");
-    await userEvent.click(dropdown);
-
-    const repoOption = screen.getAllByText("octocat/hello-world")[1];
-    await userEvent.click(repoOption);
-
-    // After selecting a repository, only tasks related to that repository should be visible
-    await waitFor(() => {
-      within(taskSuggestions).getByText("octocat/hello-world");
-      expect(
-        within(taskSuggestions).queryByText("octocat/earth"),
-      ).not.toBeInTheDocument();
-    });
-  });
-
-  it("should reset the filtered tasks when the selected repository is cleared", async () => {
-    const retrieveUserGitRepositoriesSpy = vi.spyOn(
-      GitService,
-      "retrieveUserGitRepositories",
-    );
-    retrieveUserGitRepositoriesSpy.mockResolvedValue({
-      data: MOCK_RESPOSITORIES,
-      nextPage: null,
-    });
-
-    renderHomeScreen();
-
-    const taskSuggestions = screen.getByTestId("task-suggestions");
-
-    // Initially, all tasks should be visible
-    await waitFor(() => {
-      within(taskSuggestions).getByText("octocat/hello-world");
-      within(taskSuggestions).getByText("octocat/earth");
-    });
-
-    // Select a repository from the dropdown
-    const repoConnector = screen.getByTestId("repo-connector");
-
-    const dropdown = within(repoConnector).getByTestId("repo-dropdown");
-    await userEvent.click(dropdown);
-
-    const repoOption = screen.getAllByText("octocat/hello-world")[1];
-    await userEvent.click(repoOption);
-
-    // After selecting a repository, only tasks related to that repository should be visible
-    await waitFor(() => {
-      within(taskSuggestions).getByText("octocat/hello-world");
-      expect(
-        within(taskSuggestions).queryByText("octocat/earth"),
-      ).not.toBeInTheDocument();
-    });
-
-    // Clear the selected repository
-    await userEvent.clear(dropdown);
-
-    // All tasks should be visible again
-    await waitFor(() => {
-      within(taskSuggestions).getByText("octocat/hello-world");
-      within(taskSuggestions).getByText("octocat/earth");
-    });
-  });
-
-  describe("launch buttons", () => {
-    const setupLaunchButtons = async () => {
-      let headerLaunchButton = screen.getByTestId("header-launch-button");
-      let repoLaunchButton = screen.getByTestId("repo-launch-button");
-      let tasksLaunchButtons =
-        await screen.findAllByTestId("task-launch-button");
-
-      // Select a repository from the dropdown to enable the repo launch button
-      const repoConnector = screen.getByTestId("repo-connector");
-      const dropdown = within(repoConnector).getByTestId("repo-dropdown");
-      await userEvent.click(dropdown);
-      const repoOption = screen.getAllByText("octocat/hello-world")[1];
-      await userEvent.click(repoOption);
-
-      expect(headerLaunchButton).not.toBeDisabled();
-      expect(repoLaunchButton).not.toBeDisabled();
-      tasksLaunchButtons.forEach((button) => {
-        expect(button).not.toBeDisabled();
-      });
-
-      headerLaunchButton = screen.getByTestId("header-launch-button");
-      repoLaunchButton = screen.getByTestId("repo-launch-button");
-      tasksLaunchButtons = await screen.findAllByTestId("task-launch-button");
-
-      return {
-        headerLaunchButton,
-        repoLaunchButton,
-        tasksLaunchButtons,
-      };
-    };
-
-    beforeEach(() => {
-      const retrieveUserGitRepositoriesSpy = vi.spyOn(
-        GitService,
-        "retrieveUserGitRepositories",
-      );
-      retrieveUserGitRepositoriesSpy.mockResolvedValue({
-        data: MOCK_RESPOSITORIES,
-        nextPage: null,
-      });
-    });
-
-    it("should disable the other launch buttons when the header launch button is clicked", async () => {
-      renderHomeScreen();
-      const { headerLaunchButton, repoLaunchButton } =
-        await setupLaunchButtons();
-
-      const tasksLaunchButtonsAfter =
-        await screen.findAllByTestId("task-launch-button");
-
-      // All other buttons should be disabled when the header button is clicked
-      await userEvent.click(headerLaunchButton);
-
-      expect(headerLaunchButton).toBeDisabled();
-      expect(repoLaunchButton).toBeDisabled();
-      tasksLaunchButtonsAfter.forEach((button) => {
-        expect(button).toBeDisabled();
-      });
-    });
-
-    it("should disable the other launch buttons when the repo launch button is clicked", async () => {
-      renderHomeScreen();
-      const { headerLaunchButton, repoLaunchButton } =
-        await setupLaunchButtons();
-
-      const tasksLaunchButtonsAfter =
-        await screen.findAllByTestId("task-launch-button");
-
-      // All other buttons should be disabled when the repo button is clicked
-      await userEvent.click(repoLaunchButton);
-
-      expect(headerLaunchButton).toBeDisabled();
-      expect(repoLaunchButton).toBeDisabled();
-      tasksLaunchButtonsAfter.forEach((button) => {
-        expect(button).toBeDisabled();
-      });
-    });
-
-    it("should disable the other launch buttons when any task launch button is clicked", async () => {
-      renderHomeScreen();
-      const { headerLaunchButton, repoLaunchButton, tasksLaunchButtons } =
-        await setupLaunchButtons();
-
-      const tasksLaunchButtonsAfter =
-        await screen.findAllByTestId("task-launch-button");
-
-      // All other buttons should be disabled when the task button is clicked
-      await userEvent.click(tasksLaunchButtons[0]);
-
-      expect(headerLaunchButton).toBeDisabled();
-      expect(repoLaunchButton).toBeDisabled();
-      tasksLaunchButtonsAfter.forEach((button) => {
-        expect(button).toBeDisabled();
-      });
-    });
-  });
-
-  it("should hide the suggested tasks section if not authed with git(hub|lab)", async () => {
-    renderHomeScreen(false);
-
-    const taskSuggestions = screen.queryByTestId("task-suggestions");
-    const repoConnector = screen.getByTestId("repo-connector");
-
-    expect(taskSuggestions).not.toBeInTheDocument();
-    expect(repoConnector).toBeInTheDocument();
-  });
-});
-
-describe("Settings 404", () => {
-  const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
-  const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
-
-  it("should open the settings modal if GET /settings fails with a 404", async () => {
-    const error = createAxiosNotFoundErrorObject();
-    getSettingsSpy.mockRejectedValue(error);
-
-    renderHomeScreen();
-
-    const settingsModal = await screen.findByTestId("ai-config-modal");
-    expect(settingsModal).toBeInTheDocument();
-  });
-
-  it("should navigate to the settings screen when clicking the advanced settings button", async () => {
-    const error = createAxiosNotFoundErrorObject();
-    getSettingsSpy.mockRejectedValue(error);
-
-    const user = userEvent.setup();
-    renderHomeScreen();
-
-    const settingsScreen = screen.queryByTestId("settings-screen");
-    expect(settingsScreen).not.toBeInTheDocument();
-
-    const settingsModal = await screen.findByTestId("ai-config-modal");
-    expect(settingsModal).toBeInTheDocument();
-
-    const advancedSettingsButton = await screen.findByTestId(
-      "advanced-settings-link",
-    );
-    await user.click(advancedSettingsButton);
-
-    const settingsScreenAfter = await screen.findByTestId("settings-screen");
-    expect(settingsScreenAfter).toBeInTheDocument();
-
-    const settingsModalAfter = screen.queryByTestId("ai-config-modal");
-    expect(settingsModalAfter).not.toBeInTheDocument();
-  });
-
-  it("should not open the settings modal if GET /settings fails but is SaaS mode", async () => {
-    // @ts-expect-error - we only need APP_MODE for this test
-    getConfigSpy.mockResolvedValue({
-      APP_MODE: "saas",
-      FEATURE_FLAGS: {
-        ENABLE_BILLING: false,
-        HIDE_LLM_SETTINGS: false,
-      },
-    });
-    const error = createAxiosNotFoundErrorObject();
-    getSettingsSpy.mockRejectedValue(error);
-
-    renderHomeScreen();
-
-    // small hack to wait for the modal to not appear
-    await expect(
-      screen.findByTestId("ai-config-modal", {}, { timeout: 1000 }),
-    ).rejects.toThrow();
-  });
-});
-
-describe("Setup Payment modal", () => {
-  const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
-  const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
-
-  it("should only render if SaaS mode and is new user", async () => {
-    // @ts-expect-error - we only need the APP_MODE for this test
-    getConfigSpy.mockResolvedValue({
-      APP_MODE: "saas",
-      FEATURE_FLAGS: {
-        ENABLE_BILLING: true,
-        HIDE_LLM_SETTINGS: false,
-      },
-    });
-    const error = createAxiosNotFoundErrorObject();
-    getSettingsSpy.mockRejectedValue(error);
-
-    renderHomeScreen();
-
-    const setupPaymentModal = await screen.findByTestId(
-      "proceed-to-stripe-button",
-    );
-    expect(setupPaymentModal).toBeInTheDocument();
-  });
-});
--- a/frontend/tests/routes/home.test.tsx
+++ b/frontend/tests/routes/home.test.tsx
@@ -0,0 +1,177 @@
+import { createRoutesStub } from "react-router";
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { renderWithProviders } from "test-utils";
+import userEvent from "@testing-library/user-event";
+import { screen } from "@testing-library/react";
+import { AxiosError } from "axios";
+import MainApp from "#/routes/root-layout";
+import SettingsScreen from "#/routes/settings";
+import Home from "#/routes/home";
+import OpenHands from "#/api/open-hands";
+
+const createAxiosNotFoundErrorObject = () =>
+  new AxiosError(
+    "Request failed with status code 404",
+    "ERR_BAD_REQUEST",
+    undefined,
+    undefined,
+    {
+      status: 404,
+      statusText: "Not Found",
+      data: { message: "Settings not found" },
+      headers: {},
+      // @ts-expect-error - we only need the response object for this test
+      config: {},
+    },
+  );
+
+const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
+
+const RouterStub = createRoutesStub([
+  {
+    // layout route
+    Component: MainApp,
+    path: "/",
+    children: [
+      {
+        // home route
+        Component: Home,
+        path: "/",
+      },
+      {
+        Component: SettingsScreen,
+        path: "/settings",
+      },
+    ],
+  },
+]);
+
+afterEach(() => {
+  vi.clearAllMocks();
+});
+
+describe("Home Screen", () => {
+  const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
+
+  it("should render the home screen", () => {
+    renderWithProviders(<RouterStub initialEntries={["/"]} />);
+  });
+
+  it("should navigate to the settings screen when the settings button is clicked", async () => {
+    const user = userEvent.setup();
+    renderWithProviders(<RouterStub initialEntries={["/"]} />);
+
+    const settingsButton = await screen.findByTestId("settings-button");
+    await user.click(settingsButton);
+
+    const settingsScreen = await screen.findByTestId("settings-screen");
+    expect(settingsScreen).toBeInTheDocument();
+  });
+
+  it("should navigate to the settings when pressing 'Connect to GitHub' if the user isn't authenticated", async () => {
+    // @ts-expect-error - we only need APP_MODE for this test
+    getConfigSpy.mockResolvedValue({
+      APP_MODE: "oss",
+      FEATURE_FLAGS: {
+        ENABLE_BILLING: false,
+        HIDE_LLM_SETTINGS: false,
+      },
+    });
+    const user = userEvent.setup();
+    renderWithProviders(<RouterStub initialEntries={["/"]} />);
+
+    const connectToGitHubButton =
+      await screen.findByTestId("connect-to-github");
+    await user.click(connectToGitHubButton);
+
+    const settingsScreen = await screen.findByTestId("settings-screen");
+    expect(settingsScreen).toBeInTheDocument();
+  });
+});
+
+describe("Settings 404", () => {
+  const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
+
+  it("should open the settings modal if GET /settings fails with a 404", async () => {
+    const error = createAxiosNotFoundErrorObject();
+    getSettingsSpy.mockRejectedValue(error);
+
+    renderWithProviders(<RouterStub initialEntries={["/"]} />);
+
+    const settingsModal = await screen.findByTestId("ai-config-modal");
+    expect(settingsModal).toBeInTheDocument();
+  });
+
+  it("should navigate to the settings screen when clicking the advanced settings button", async () => {
+    const error = createAxiosNotFoundErrorObject();
+    getSettingsSpy.mockRejectedValue(error);
+
+    const user = userEvent.setup();
+    renderWithProviders(<RouterStub initialEntries={["/"]} />);
+
+    const settingsScreen = screen.queryByTestId("settings-screen");
+    expect(settingsScreen).not.toBeInTheDocument();
+
+    const settingsModal = await screen.findByTestId("ai-config-modal");
+    expect(settingsModal).toBeInTheDocument();
+
+    const advancedSettingsButton = await screen.findByTestId(
+      "advanced-settings-link",
+    );
+    await user.click(advancedSettingsButton);
+
+    const settingsScreenAfter = await screen.findByTestId("settings-screen");
+    expect(settingsScreenAfter).toBeInTheDocument();
+
+    const settingsModalAfter = screen.queryByTestId("ai-config-modal");
+    expect(settingsModalAfter).not.toBeInTheDocument();
+  });
+
+  it("should not open the settings modal if GET /settings fails but is SaaS mode", async () => {
+    // @ts-expect-error - we only need APP_MODE for this test
+    getConfigSpy.mockResolvedValue({
+      APP_MODE: "saas",
+      FEATURE_FLAGS: {
+        ENABLE_BILLING: false,
+        HIDE_LLM_SETTINGS: false,
+      },
+    });
+    const error = createAxiosNotFoundErrorObject();
+    getSettingsSpy.mockRejectedValue(error);
+
+    renderWithProviders(<RouterStub initialEntries={["/"]} />);
+
+    // small hack to wait for the modal to not appear
+    await expect(
+      screen.findByTestId("ai-config-modal", {}, { timeout: 1000 }),
+    ).rejects.toThrow();
+  });
+});
+
+describe("Setup Payment modal", () => {
+  const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
+
+  afterEach(() => {
+    vi.resetAllMocks();
+  });
+
+  it("should only render if SaaS mode and is new user", async () => {
+    // @ts-expect-error - we only need the APP_MODE for this test
+    getConfigSpy.mockResolvedValue({
+      APP_MODE: "saas",
+      FEATURE_FLAGS: {
+        ENABLE_BILLING: true,
+        HIDE_LLM_SETTINGS: false,
+      },
+    });
+    const error = createAxiosNotFoundErrorObject();
+    getSettingsSpy.mockRejectedValue(error);
+
+    renderWithProviders(<RouterStub initialEntries={["/"]} />);
+
+    const setupPaymentModal = await screen.findByTestId(
+      "proceed-to-stripe-button",
+    );
+    expect(setupPaymentModal).toBeInTheDocument();
+  });
+});
--- a/frontend/tests/routes/settings-with-payment.test.tsx
+++ b/frontend/tests/routes/settings-with-payment.test.tsx
@@ -43,12 +43,10 @@ describe("Settings Billing", () => {

    renderSettingsScreen();

-    // Wait for the settings screen to be rendered
-    await screen.findByTestId("settings-screen");
-
-    // Then check that the navbar is not present
-    const navbar = screen.queryByTestId("settings-navbar");
-    expect(navbar).not.toBeInTheDocument();
+    await waitFor(() => {
+      const navbar = screen.queryByTestId("settings-navbar");
+      expect(navbar).not.toBeInTheDocument();
+    });
  });

  it("should render the navbar if SaaS mode", async () => {
--- a/frontend/tests/routes/settings.test.tsx
+++ b/frontend/tests/routes/settings.test.tsx
@@ -25,6 +25,7 @@ const mock_provider_tokens_are_set: Record<Provider, boolean> = {
 describe("Settings Screen", () => {
  const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
  const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
+  const resetSettingsSpy = vi.spyOn(OpenHands, "resetSettings");
  const getConfigSpy = vi.spyOn(OpenHands, "getConfig");

  const { handleLogoutMock } = vi.hoisted(() => ({
@@ -66,6 +67,7 @@ describe("Settings Screen", () => {
      // Use queryAllByText to handle multiple elements with the same text
      expect(screen.queryAllByText("SETTINGS$LLM_SETTINGS")).not.toHaveLength(0);
      screen.getByText("ACCOUNT_SETTINGS$ADDITIONAL_SETTINGS");
+      screen.getByText("BUTTON$RESET_TO_DEFAULTS");
      screen.getByText("BUTTON$SAVE");
    });
  });
@@ -540,6 +542,54 @@ describe("Settings Screen", () => {
        });
      });

+      test("resetting settings with no changes but having advanced enabled should hide the advanced items", async () => {
+        const user = userEvent.setup();
+
+        getSettingsSpy.mockResolvedValueOnce({
+          ...MOCK_DEFAULT_USER_SETTINGS,
+        });
+
+        renderSettingsScreen();
+
+        await toggleAdvancedSettings(user);
+
+        const resetButton = screen.getByText("BUTTON$RESET_TO_DEFAULTS");
+        await user.click(resetButton);
+
+        // show modal
+        const modal = await screen.findByTestId("reset-modal");
+        expect(modal).toBeInTheDocument();
+
+        // Mock the settings that will be returned after reset
+        // This should be the default settings with no advanced settings enabled
+        getSettingsSpy.mockResolvedValueOnce({
+          ...MOCK_DEFAULT_USER_SETTINGS,
+          llm_base_url: "",
+          confirmation_mode: false,
+          security_analyzer: "",
+        });
+
+        // confirm reset
+        const confirmButton = within(modal).getByText("Reset");
+        await user.click(confirmButton);
+
+        await waitFor(() => {
+          expect(
+            screen.queryByTestId("llm-custom-model-input"),
+          ).not.toBeInTheDocument();
+          expect(
+            screen.queryByTestId("base-url-input"),
+          ).not.toBeInTheDocument();
+          expect(screen.queryByTestId("agent-input")).not.toBeInTheDocument();
+          expect(
+            screen.queryByTestId("security-analyzer-input"),
+          ).not.toBeInTheDocument();
+          expect(
+            screen.queryByTestId("enable-confirmation-mode-switch"),
+          ).not.toBeInTheDocument();
+        });
+      });
+
      it("should save if only confirmation mode is enabled", async () => {
        const user = userEvent.setup();
        renderSettingsScreen();
@@ -712,6 +762,81 @@ describe("Settings Screen", () => {
      );
    });

+    it("should reset the settings when the 'Reset to defaults' button is clicked", async () => {
+      const user = userEvent.setup();
+      getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
+
+      renderSettingsScreen();
+
+      const languageInput = await screen.findByTestId("language-input");
+      await user.click(languageInput);
+
+      const norskOption = await screen.findByText("Norsk");
+      await user.click(norskOption);
+
+      expect(languageInput).toHaveValue("Norsk");
+
+      const resetButton = screen.getByText("BUTTON$RESET_TO_DEFAULTS");
+      await user.click(resetButton);
+
+      expect(saveSettingsSpy).not.toHaveBeenCalled();
+
+      // show modal
+      const modal = await screen.findByTestId("reset-modal");
+      expect(modal).toBeInTheDocument();
+
+      // confirm reset
+      const confirmButton = within(modal).getByText("Reset");
+      await user.click(confirmButton);
+
+      await waitFor(() => {
+        expect(resetSettingsSpy).toHaveBeenCalled();
+      });
+
+      // Mock the settings response after reset
+      getSettingsSpy.mockResolvedValueOnce({
+        ...MOCK_DEFAULT_USER_SETTINGS,
+        llm_base_url: "",
+        confirmation_mode: false,
+        security_analyzer: "",
+      });
+
+      // Wait for the mutation to complete and the modal to be removed
+      await waitFor(() => {
+        expect(screen.queryByTestId("reset-modal")).not.toBeInTheDocument();
+        expect(
+          screen.queryByTestId("llm-custom-model-input"),
+        ).not.toBeInTheDocument();
+        expect(screen.queryByTestId("base-url-input")).not.toBeInTheDocument();
+        expect(screen.queryByTestId("agent-input")).not.toBeInTheDocument();
+        expect(
+          screen.queryByTestId("security-analyzer-input"),
+        ).not.toBeInTheDocument();
+        expect(
+          screen.queryByTestId("enable-confirmation-mode-switch"),
+        ).not.toBeInTheDocument();
+      });
+    });
+
+    it("should cancel the reset when the 'Cancel' button is clicked", async () => {
+      const user = userEvent.setup();
+      getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
+
+      renderSettingsScreen();
+
+      const resetButton = await screen.findByText("BUTTON$RESET_TO_DEFAULTS");
+      await user.click(resetButton);
+
+      const modal = await screen.findByTestId("reset-modal");
+      expect(modal).toBeInTheDocument();
+
+      const cancelButton = within(modal).getByText("Cancel");
+      await user.click(cancelButton);
+
+      expect(saveSettingsSpy).not.toHaveBeenCalled();
+      expect(screen.queryByTestId("reset-modal")).not.toBeInTheDocument();
+    });
+
    it("should call handleCaptureConsent with true if the save is successful", async () => {
      const user = userEvent.setup();
      const handleCaptureConsentSpy = vi.spyOn(
@@ -919,5 +1044,18 @@ describe("Settings Screen", () => {
      );
    });

+    it("should not submit the unwanted fields when resetting", async () => {
+      const user = userEvent.setup();
+      renderSettingsScreen();
+
+      const resetButton = await screen.findByText("BUTTON$RESET_TO_DEFAULTS");
+      await user.click(resetButton);
+
+      const modal = await screen.findByTestId("reset-modal");
+      const confirmButton = within(modal).getByText("Reset");
+      await user.click(confirmButton);
+      expect(saveSettingsSpy).not.toHaveBeenCalled();
+      expect(resetSettingsSpy).toHaveBeenCalled();
+    });
  });
 });
--- a/frontend/tests/services/observations.test.ts
+++ b/frontend/tests/services/observations.test.ts
@@ -48,4 +48,4 @@ describe("Observations Service", () => {
      });
    });
  });
-});
+});
--- a/Show More
+++ b/Show More