Update evaluation scripts to support both Poetry and UV

This PR updates all evaluation shell scripts to use a configurable package runner that supports both Poetry and UV. Changes: - evaluation/utils/version_control.sh: Add get_pkg_run() helper function that returns 'uv run' if USE_UV=1, otherwise 'poetry run' - All evaluation scripts: Replace hardcoded 'poetry run' with $PKG_RUN variable that uses the helper function Usage: - Default (Poetry): ./run_infer.sh ... - With UV: USE_UV=1 ./run_infer.sh ... This is a non-breaking change - all scripts default to Poetry behavior. Closes #12421 (partial) Co-authored-by: openhands <openhands@all-hands.dev>
Update runtime build code to support both lock files
2026-04-29 03:00:45 -04:00 · 2026-01-14 23:50:52 +00:00 · 2026-01-14 23:48:03 +00:00 · 2026-01-14 23:45:36 +00:00 · 2026-01-14 21:55:07 +00:00 · 2026-01-14 21:53:03 +00:00
55 changed files with 11053 additions and 104 deletions
--- a/.github/workflows/e2e-tests.yml
+++ b/.github/workflows/e2e-tests.yml
@@ -26,6 +26,9 @@ jobs:
        with:
          poetry-version: 2.1.3

+      - name: Install UV
+        run: curl -LsSf https://astral.sh/uv/install.sh | sh && echo "$HOME/.local/bin" >> $GITHUB_PATH
+
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
--- a/.github/workflows/ghcr-build.yml
+++ b/.github/workflows/ghcr-build.yml
@@ -116,6 +116,8 @@ jobs:
        uses: docker/setup-buildx-action@v3
      - name: Install poetry via pipx
        run: pipx install poetry
+      - name: Install UV
+        run: curl -LsSf https://astral.sh/uv/install.sh | sh && echo "$HOME/.local/bin" >> $GITHUB_PATH
      - name: Set up Python
        uses: useblacksmith/setup-python@v6
        with:
--- a/.github/workflows/py-tests.yml
+++ b/.github/workflows/py-tests.yml
@@ -42,6 +42,8 @@ jobs:
          node-version: "22.x"
      - name: Install poetry via pipx
        run: pipx install poetry
+      - name: Install UV
+        run: curl -LsSf https://astral.sh/uv/install.sh | sh && echo "$HOME/.local/bin" >> $GITHUB_PATH
      - name: Set up Python
        uses: useblacksmith/setup-python@v6
        with:
@@ -81,6 +83,8 @@ jobs:
      - uses: actions/checkout@v4
      - name: Install poetry via pipx
        run: pipx install poetry
+      - name: Install UV
+        run: curl -LsSf https://astral.sh/uv/install.sh | sh && echo "$HOME/.local/bin" >> $GITHUB_PATH
      - name: Set up Python
        uses: useblacksmith/setup-python@v6
        with:
--- a/.github/workflows/pypi-release.yml
+++ b/.github/workflows/pypi-release.yml
@@ -32,6 +32,8 @@ jobs:
        with:
          virtualenvs-in-project: true
          virtualenvs-path: ~/.virtualenvs
+      - name: Install UV
+        run: curl -LsSf https://astral.sh/uv/install.sh | sh && echo "$HOME/.local/bin" >> $GITHUB_PATH
      - name: Install Poetry Dependencies
        run: poetry install --no-interaction --no-root
      - name: Build poetry project
--- a/74
+++ b/74
@@ -14,6 +14,23 @@ PRE_COMMIT_CONFIG_PATH = "./dev_config/python/.pre-commit-config.yaml"
 PYTHON_VERSION = 3.12
 KIND_CLUSTER_NAME = "local-hands"

+# Package manager selection: "uv" or "poetry" (default: poetry for backward compatibility)
+# Set USE_UV=1 to use UV instead of Poetry
+USE_UV ?= 0
+ifeq ($(USE_UV),1)
+PKG_MANAGER = uv
+PKG_RUN = uv run
+PKG_INSTALL = uv sync
+PKG_INSTALL_GROUPS = --group dev --group test --group runtime
+PKG_INSTALL_ONLY_PREFIX = --only-group
+else
+PKG_MANAGER = poetry
+PKG_RUN = poetry run
+PKG_INSTALL = poetry install
+PKG_INSTALL_GROUPS = --with dev,test,runtime
+PKG_INSTALL_ONLY_PREFIX = --only
+endif
+
 # ANSI color codes
 GREEN=$(shell tput -Txterm setaf 2)
 YELLOW=$(shell tput -Txterm setaf 3)
@@ -40,7 +57,7 @@ check-dependencies:
 ifeq ($(INSTALL_DOCKER),)
 	@$(MAKE) -s check-docker
 endif
-	@$(MAKE) -s check-poetry
+	@$(MAKE) -s check-pkg-manager
 	@$(MAKE) -s check-tmux
 	@echo "$(GREEN)Dependencies checked successfully.$(RESET)"

@@ -116,13 +133,24 @@ check-tmux:
 		echo "$(YELLOW)╚════════════════════════════════════════════════════════════════════════════╝$(RESET)"; \
 	fi

-check-poetry:
+check-pkg-manager:
+ifeq ($(USE_UV),1)
+	@echo "$(YELLOW)Checking UV installation...$(RESET)"
+	@if command -v uv > /dev/null; then \
+		echo "$(BLUE)$$(uv --version) is already installed.$(RESET)"; \
+	else \
+		echo "$(RED)UV is not installed. You can install UV by running:"; \
+		echo "$(RED) curl -LsSf https://astral.sh/uv/install.sh | sh$(RESET)"; \
+		echo "$(RED)More detail here: https://docs.astral.sh/uv/getting-started/installation/$(RESET)"; \
+		exit 1; \
+	fi
+else
 	@echo "$(YELLOW)Checking Poetry installation...$(RESET)"
 	@if command -v poetry > /dev/null; then \
-		POETRY_VERSION=$(shell poetry --version 2>&1 | sed -E 's/Poetry \(version ([0-9]+\.[0-9]+\.[0-9]+)\)/\1/'); \
+		POETRY_VERSION=$$(poetry --version 2>&1 | sed -E 's/Poetry \(version ([0-9]+\.[0-9]+\.[0-9]+)\)/\1/'); \
 		IFS='.' read -r -a POETRY_VERSION_ARRAY <<< "$$POETRY_VERSION"; \
 		if [ $${POETRY_VERSION_ARRAY[0]} -gt 1 ] || ([ $${POETRY_VERSION_ARRAY[0]} -eq 1 ] && [ $${POETRY_VERSION_ARRAY[1]} -ge 8 ]); then \
-			echo "$(BLUE)$(shell poetry --version) is already installed.$(RESET)"; \
+			echo "$(BLUE)$$(poetry --version) is already installed.$(RESET)"; \
 		else \
 			echo "$(RED)Poetry 1.8 or later is required. You can install poetry by running the following command, then adding Poetry to your PATH:"; \
 			echo "$(RED) curl -sSL https://install.python-poetry.org | python$(PYTHON_VERSION) -$(RESET)"; \
@@ -135,6 +163,10 @@ check-poetry:
 		echo "$(RED)More detail here: https://python-poetry.org/docs/#installing-with-the-official-installer$(RESET)"; \
 		exit 1; \
 	fi
+endif
+
+# Legacy alias for backward compatibility
+check-poetry: check-pkg-manager

 install-python-dependencies:
 	@echo "$(GREEN)Installing Python dependencies...$(RESET)"
@@ -142,6 +174,21 @@ install-python-dependencies:
 		echo "Defaulting TZ (timezone) to UTC"; \
 		export TZ="UTC"; \
 	fi
+ifeq ($(USE_UV),1)
+	@echo "$(BLUE)Using UV for dependency management$(RESET)"
+	@if [ "$(shell uname)" = "Darwin" ]; then \
+		echo "$(BLUE)Installing chroma-hnswlib...$(RESET)"; \
+		export HNSWLIB_NO_NATIVE=1; \
+		uv pip install chroma-hnswlib; \
+	fi
+	@if [ -n "${DEP_GROUP}" ]; then \
+		echo "Installing only DEP_GROUP=${DEP_GROUP}"; \
+		uv sync --only-group $${DEP_GROUP}; \
+	else \
+		uv sync --group dev --group test --group runtime; \
+	fi
+else
+	@echo "$(BLUE)Using Poetry for dependency management$(RESET)"
 	poetry env use python$(PYTHON_VERSION)
 	@if [ "$(shell uname)" = "Darwin" ]; then \
 		echo "$(BLUE)Installing chroma-hnswlib...$(RESET)"; \
@@ -154,15 +201,16 @@ install-python-dependencies:
 	else \
 		poetry install --with dev,test,runtime; \
 	fi
+endif
 	@if [ "${INSTALL_PLAYWRIGHT}" != "false" ] && [ "${INSTALL_PLAYWRIGHT}" != "0" ]; then \
 		if [ -f "/etc/manjaro-release" ]; then \
 			echo "$(BLUE)Detected Manjaro Linux. Installing Playwright dependencies...$(RESET)"; \
-			poetry run pip install playwright; \
-			poetry run playwright install chromium; \
+			$(PKG_RUN) pip install playwright; \
+			$(PKG_RUN) playwright install chromium; \
 		else \
 			if [ ! -f cache/playwright_chromium_is_installed.txt ]; then \
 				echo "Running playwright install --with-deps chromium..."; \
-				poetry run playwright install --with-deps chromium; \
+				$(PKG_RUN) playwright install --with-deps chromium; \
 				mkdir -p cache; \
 				touch cache/playwright_chromium_is_installed.txt; \
 			else \
@@ -182,15 +230,15 @@ install-frontend-dependencies: check-npm check-nodejs
 	@cd frontend && npm install
 	@echo "$(GREEN)Frontend dependencies installed successfully.$(RESET)"

-install-pre-commit-hooks: check-python check-poetry install-python-dependencies
+install-pre-commit-hooks: check-python check-pkg-manager install-python-dependencies
 	@echo "$(YELLOW)Installing pre-commit hooks...$(RESET)"
 	@git config --unset-all core.hooksPath || true
-	@poetry run pre-commit install --config $(PRE_COMMIT_CONFIG_PATH)
+	@$(PKG_RUN) pre-commit install --config $(PRE_COMMIT_CONFIG_PATH)
 	@echo "$(GREEN)Pre-commit hooks installed successfully.$(RESET)"

 lint-backend: install-pre-commit-hooks
 	@echo "$(YELLOW)Running linters...$(RESET)"
-	@poetry run pre-commit run --all-files --show-diff-on-failure --config $(PRE_COMMIT_CONFIG_PATH)
+	@$(PKG_RUN) pre-commit run --all-files --show-diff-on-failure --config $(PRE_COMMIT_CONFIG_PATH)

 lint-frontend: install-frontend-dependencies
 	@echo "$(YELLOW)Running linters for frontend...$(RESET)"
@@ -248,7 +296,7 @@ build-frontend:
 # Start backend
 start-backend:
 	@echo "$(YELLOW)Starting backend...$(RESET)"
-	@poetry run uvicorn openhands.server.listen:app --host $(BACKEND_HOST) --port $(BACKEND_PORT) --reload --reload-exclude "./workspace"
+	@$(PKG_RUN) uvicorn openhands.server.listen:app --host $(BACKEND_HOST) --port $(BACKEND_PORT) --reload --reload-exclude "./workspace"

 # Start frontend
 start-frontend:
@@ -270,7 +318,7 @@ _run_setup:
 	fi
 	@mkdir -p logs
 	@echo "$(YELLOW)Starting backend server...$(RESET)"
-	@poetry run uvicorn openhands.server.listen:app --host $(BACKEND_HOST) --port $(BACKEND_PORT) &
+	@$(PKG_RUN) uvicorn openhands.server.listen:app --host $(BACKEND_HOST) --port $(BACKEND_PORT) &
 	@echo "$(YELLOW)Waiting for the backend to start...$(RESET)"
 	@until nc -z localhost $(BACKEND_PORT); do sleep 0.1; done
 	@echo "$(GREEN)Backend started successfully.$(RESET)"
@@ -367,5 +415,5 @@ help:
 	@echo "  $(GREEN)help$(RESET)                - Display this help message, providing information on available targets."

 # Phony targets
-.PHONY: build check-dependencies check-system check-python check-npm check-nodejs check-docker check-poetry install-python-dependencies install-frontend-dependencies install-pre-commit-hooks lint-backend lint-frontend lint test-frontend test build-frontend start-backend start-frontend _run_setup run run-wsl setup-config setup-config-prompts setup-config-basic openhands-cloud-run docker-dev docker-run clean help
+.PHONY: build check-dependencies check-system check-python check-npm check-nodejs check-docker check-pkg-manager check-poetry install-python-dependencies install-frontend-dependencies install-pre-commit-hooks lint-backend lint-frontend lint test-frontend test build-frontend start-backend start-frontend _run_setup run run-wsl setup-config setup-config-prompts setup-config-basic openhands-cloud-run docker-dev docker-run clean help
 .PHONY: kind
--- a/containers/app/Dockerfile
+++ b/containers/app/Dockerfile
@@ -15,6 +15,9 @@ FROM base AS backend-builder
 WORKDIR /app
 ENV PYTHONPATH='/app'

+# Package manager selection: set USE_UV=1 to use UV instead of Poetry
+ARG USE_UV=0
+
 ENV POETRY_NO_INTERACTION=1 \
    POETRY_VIRTUALENVS_IN_PROJECT=1 \
    POETRY_VIRTUALENVS_CREATE=1 \
@@ -22,11 +25,21 @@ ENV POETRY_NO_INTERACTION=1 \

 RUN apt-get update -y \
    && apt-get install -y curl make git build-essential jq gettext \
-    && python3 -m pip install poetry --break-system-packages
+    && python3 -m pip install poetry --break-system-packages \
+    && curl -LsSf https://astral.sh/uv/install.sh | sh

-COPY pyproject.toml poetry.lock ./
+# Copy both lock files for flexibility
+COPY pyproject.toml poetry.lock uv.lock ./
 RUN touch README.md
-RUN export POETRY_CACHE_DIR && poetry install --no-root && rm -rf $POETRY_CACHE_DIR
+
+# Install dependencies using selected package manager
+RUN if [ "$USE_UV" = "1" ]; then \
+        echo "Installing dependencies with UV..." && \
+        /root/.local/bin/uv sync --no-dev; \
+    else \
+        echo "Installing dependencies with Poetry..." && \
+        export POETRY_CACHE_DIR && poetry install --no-root && rm -rf $POETRY_CACHE_DIR; \
+    fi

 FROM base AS openhands-app

@@ -76,7 +89,7 @@ COPY --chown=openhands:openhands --chmod=770 --from=backend-builder ${VIRTUAL_EN
 COPY --chown=openhands:openhands --chmod=770 ./skills ./skills
 COPY --chown=openhands:openhands --chmod=770 ./openhands ./openhands
 COPY --chown=openhands:openhands --chmod=777 ./openhands/runtime/plugins ./openhands/runtime/plugins
-COPY --chown=openhands:openhands pyproject.toml poetry.lock README.md MANIFEST.in LICENSE ./
+COPY --chown=openhands:openhands pyproject.toml poetry.lock uv.lock README.md MANIFEST.in LICENSE ./

 # This is run as "openhands" user, and will create __pycache__ with openhands:openhands ownership
 RUN python openhands/core/download.py # No-op to download assets
--- a/containers/dev/Dockerfile
+++ b/containers/dev/Dockerfile
@@ -69,6 +69,10 @@ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
 RUN curl -fsSL https://install.python-poetry.org | python3.12 - \
    && ln -s ~/.local/bin/poetry /usr/local/bin/poetry

+# UV (alternative package manager)
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
+    && ln -s ~/.local/bin/uv /usr/local/bin/uv
+
 #
 RUN <<EOF
 #!/bin/bash
@@ -80,9 +84,10 @@ gh --version | head -n 1
 git --version
 #
 python --version
-echo node `node --version`
-echo npm `npm --version`
+echo node \`node --version\`
+echo npm \`npm --version\`
 poetry --version
+uv --version
 netcat -h 2>&1 | head -n 1
 " > /version.sh
 chmod a+x /version.sh
--- a/evaluation/benchmarks/EDA/scripts/run_infer.sh
+++ b/evaluation/benchmarks/EDA/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -40,7 +43,7 @@ echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
 echo "MODEL_CONFIG: $MODEL_CONFIG"
 echo "DATASET: $DATASET"

-COMMAND="poetry run python evaluation/benchmarks/EDA/run_infer.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/EDA/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --dataset $DATASET \
--- a/evaluation/benchmarks/agent_bench/scripts/run_infer.sh
+++ b/evaluation/benchmarks/agent_bench/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -26,7 +29,7 @@ echo "AGENT: $AGENT"
 echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
 echo "MODEL_CONFIG: $MODEL_CONFIG"

-COMMAND="export PYTHONPATH=evaluation/benchmarks/agent_bench:\$PYTHONPATH && poetry run python evaluation/benchmarks/agent_bench/run_infer.py \
+COMMAND="export PYTHONPATH=evaluation/benchmarks/agent_bench:\$PYTHONPATH && $PKG_RUN python evaluation/benchmarks/agent_bench/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --max-iterations 30 \
--- a/evaluation/benchmarks/aider_bench/scripts/run_infer.sh
+++ b/evaluation/benchmarks/aider_bench/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -39,7 +42,7 @@ if [ "$USE_UNIT_TESTS" = true ]; then
  EVAL_NOTE=$EVAL_NOTE-w-test
 fi

-COMMAND="export PYTHONPATH=evaluation/benchmarks/aider_bench:\$PYTHONPATH && poetry run python evaluation/benchmarks/aider_bench/run_infer.py \
+COMMAND="export PYTHONPATH=evaluation/benchmarks/aider_bench:\$PYTHONPATH && $PKG_RUN python evaluation/benchmarks/aider_bench/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --max-iterations 30 \
--- a/evaluation/benchmarks/algotune/scripts/run_infer.sh
+++ b/evaluation/benchmarks/algotune/scripts/run_infer.sh
@@ -2,10 +2,13 @@
 set -eo pipefail

 # Generate the tasks
-poetry run python evaluation/benchmarks/algotune/adapter/run_adapter.py --output-path evaluation/benchmarks/algotune/tasks
+$PKG_RUN python evaluation/benchmarks/algotune/adapter/run_adapter.py --output-path evaluation/benchmarks/algotune/tasks

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -59,7 +62,7 @@ fi
 echo "ENABLE_VOLUMES: $ENABLE_VOLUMES"

 # Construct the command
-COMMAND="poetry run python evaluation/benchmarks/algotune/run_infer.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/algotune/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --optim_task $OPTIM_TASK \
--- a/evaluation/benchmarks/biocoder/scripts/run_infer.sh
+++ b/evaluation/benchmarks/biocoder/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -28,7 +31,7 @@ echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
 echo "MODEL_CONFIG: $MODEL_CONFIG"
 echo "DATASET: $DATASET"

-COMMAND="poetry run python evaluation/benchmarks/biocoder/run_infer.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/biocoder/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --max-iterations 10 \
--- a/evaluation/benchmarks/bird/scripts/run_infer.sh
+++ b/evaluation/benchmarks/bird/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -26,7 +29,7 @@ echo "AGENT: $AGENT"
 echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
 echo "MODEL_CONFIG: $MODEL_CONFIG"

-COMMAND="poetry run python evaluation/benchmarks/bird/run_infer.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/bird/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --max-iterations 5 \
--- a/evaluation/benchmarks/browsing_delegation/scripts/run_infer.sh
+++ b/evaluation/benchmarks/browsing_delegation/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -28,7 +31,7 @@ echo "MODEL_CONFIG: $MODEL_CONFIG"

 EVAL_NOTE="$OPENHANDS_VERSION"

-COMMAND="poetry run python evaluation/benchmarks/browsing_delegation/run_infer.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/browsing_delegation/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --max-iterations 1 \
--- a/evaluation/benchmarks/commit0/scripts/run_infer.sh
+++ b/evaluation/benchmarks/commit0/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 REPO_SPLIT=$1
 MODEL_CONFIG=$2
 COMMIT_HASH=$3
@@ -84,7 +87,7 @@ fi

 function run_eval() {
  local eval_note=$1
-  COMMAND="poetry run python evaluation/benchmarks/commit0/run_infer.py \
+  COMMAND="$PKG_RUN python evaluation/benchmarks/commit0/run_infer.py \
    --agent-cls $AGENT \
    --llm-config $MODEL_CONFIG \
    --max-iterations $MAX_ITER \
--- a/evaluation/benchmarks/discoverybench/scripts/run_infer.sh
+++ b/evaluation/benchmarks/discoverybench/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -29,7 +32,7 @@ echo "AGENT: $AGENT"
 echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
 echo "MODEL_CONFIG: $MODEL_CONFIG"

-COMMAND="poetry run python evaluation/benchmarks/discoverybench/run_infer.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/discoverybench/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --max-iterations 10 \
--- a/evaluation/benchmarks/gaia/scripts/run_infer.sh
+++ b/evaluation/benchmarks/gaia/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -36,7 +39,7 @@ echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
 echo "MODEL_CONFIG: $MODEL_CONFIG"
 echo "LEVELS: $LEVELS"

-COMMAND="poetry run python ./evaluation/benchmarks/gaia/run_infer.py \
+COMMAND="$PKG_RUN python ./evaluation/benchmarks/gaia/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --max-iterations 60 \
--- a/evaluation/benchmarks/gorilla/scripts/run_infer.sh
+++ b/evaluation/benchmarks/gorilla/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -33,7 +36,7 @@ echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
 echo "MODEL_CONFIG: $MODEL_CONFIG"
 echo "HUBS: $HUBS"

-COMMAND="poetry run python evaluation/benchmarks/gorilla/run_infer.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/gorilla/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --max-iterations 30 \
--- a/evaluation/benchmarks/gpqa/scripts/run_infer.sh
+++ b/evaluation/benchmarks/gpqa/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 EVAL_LIMIT=$3
@@ -33,7 +36,7 @@ echo "AGENT: $AGENT"
 echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
 echo "MODEL_CONFIG: $MODEL_CONFIG"

-COMMAND="poetry run python evaluation/benchmarks/gpqa/run_infer.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/gpqa/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --max-iterations 10 \
--- a/evaluation/benchmarks/humanevalfix/scripts/run_infer.sh
+++ b/evaluation/benchmarks/humanevalfix/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -64,7 +67,7 @@ echo "AGENT: $AGENT"
 echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
 echo "MODEL_CONFIG: $MODEL_CONFIG"

-COMMAND="poetry run python evaluation/benchmarks/humanevalfix/run_infer.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/humanevalfix/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --max-iterations 10 \
--- a/evaluation/benchmarks/lca_ci_build_repair/scripts/eval_infer.sh
+++ b/evaluation/benchmarks/lca_ci_build_repair/scripts/eval_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 PROCESS_FILEPATH=$1
 if [ -z "$PROCESS_FILEPATH" ]; then
    echo "Error: PROCESS_FILEPATH is empty. Usage: ./eval_infer.sh <output_file> [instance_id] [dataset_name] [split]"
@@ -21,7 +24,7 @@ if [ -n "$EXP_NAME" ]; then
 fi

 function run_eval() {
-  COMMAND="poetry run python ./evaluation/benchmarks/lca_ci_build_repair/eval_infer.py \
+  COMMAND="$PKG_RUN python ./evaluation/benchmarks/lca_ci_build_repair/eval_infer.py \
    --predictions-path $PROCESS_FILEPATH "

  echo "RUNNING: $COMMAND"
--- a/evaluation/benchmarks/lca_ci_build_repair/scripts/run_infer.sh
+++ b/evaluation/benchmarks/lca_ci_build_repair/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1

 get_openhands_version
@@ -16,7 +19,7 @@ if [ -n "$EXP_NAME" ]; then
 fi

 function run_eval() {
-  COMMAND="poetry run python ./evaluation/benchmarks/lca_ci_build_repair/run_infer.py \
+  COMMAND="$PKG_RUN python ./evaluation/benchmarks/lca_ci_build_repair/run_infer.py \
    --llm-config $MODEL_CONFIG "

  # Run the command
--- a/evaluation/benchmarks/logic_reasoning/scripts/run_infer.sh
+++ b/evaluation/benchmarks/logic_reasoning/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 DATASET=$2
 COMMIT_HASH=$3
@@ -34,7 +37,7 @@ echo "AGENT: $AGENT"
 echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
 echo "MODEL_CONFIG: $MODEL_CONFIG"

-COMMAND="poetry run python evaluation/benchmarks/logic_reasoning/run_infer.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/logic_reasoning/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --dataset $DATASET \
--- a/evaluation/benchmarks/miniwob/scripts/run_infer.sh
+++ b/evaluation/benchmarks/miniwob/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 # configure browsing agent
 export USE_NAV="false"
 export USE_CONCISE_ANSWER="true"
@@ -33,7 +36,7 @@ echo "MODEL_CONFIG: $MODEL_CONFIG"

 EVAL_NOTE="${OPENHANDS_VERSION}_${NOTE}"

-COMMAND="export PYTHONPATH=evaluation/benchmarks/miniwob:\$PYTHONPATH && poetry run python evaluation/benchmarks/miniwob/run_infer.py \
+COMMAND="export PYTHONPATH=evaluation/benchmarks/miniwob:\$PYTHONPATH && $PKG_RUN python evaluation/benchmarks/miniwob/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --max-iterations 10 \
--- a/evaluation/benchmarks/mint/scripts/run_infer.sh
+++ b/evaluation/benchmarks/mint/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 SUBSET=$3
@@ -25,7 +28,7 @@ echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"

 export PYTHONPATH=$(pwd)

-COMMAND="poetry run python ./evaluation/mint/run_infer.py \
+COMMAND="$PKG_RUN python ./evaluation/mint/run_infer.py \
    --llm-config $MODEL_CONFIG \
    --max-iterations 5 \
    --max-propose-solution 2 \
--- a/evaluation/benchmarks/ml_bench/scripts/run_analysis.sh
+++ b/evaluation/benchmarks/ml_bench/scripts/run_analysis.sh
@@ -1,5 +1,8 @@
 #!/usr/bin/env bash

+
+# Package manager runner - uses UV if USE_UV=1, otherwise Poetry
+PKG_RUN=${PKG_RUN:-poetry run}
 RESULT_FILE=$1
 MODEL_CONFIG=$2

@@ -17,7 +20,7 @@ fi
 echo "MODEL_CONFIG: $MODEL_CONFIG"
 echo "RESULT_FILE: $RESULT_FILE"

-COMMAND="poetry run python evaluation/benchmarks/ml_bench/run_analysis.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/ml_bench/run_analysis.py \
  --llm-config $MODEL_CONFIG \
  --json_file_path $RESULT_FILE"

--- a/evaluation/benchmarks/ml_bench/scripts/run_infer.sh
+++ b/evaluation/benchmarks/ml_bench/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 SPLIT=$3
@@ -32,7 +35,7 @@ echo "AGENT: $AGENT"
 echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
 echo "MODEL_CONFIG: $MODEL_CONFIG"

-COMMAND="poetry run python evaluation/benchmarks/ml_bench/run_infer.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/ml_bench/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --max-iterations 10 \
--- a/evaluation/benchmarks/multi_swe_bench/scripts/rollout_multi_swegym.sh
+++ b/evaluation/benchmarks/multi_swe_bench/scripts/rollout_multi_swegym.sh
@@ -29,7 +29,7 @@ DATASET="${EVAL_DATASET%.jsonl}_with_runtime_.jsonl"  # path to converted datase

 # Create the converted dataset file
 echo "Creating converted dataset at: $DATASET"
-poetry run python ./evaluation/benchmarks/multi_swe_bench/scripts/data/data_change.py --input "$EVAL_DATASET" --output "$DATASET"
+$PKG_RUN python ./evaluation/benchmarks/multi_swe_bench/scripts/data/data_change.py --input "$EVAL_DATASET" --output "$DATASET"

 SPLIT="train"
 export LANGUAGE=java
@@ -45,6 +45,9 @@ fi

 # ===== Run inference =====
 source "evaluation/utils/version_control.sh"
+
+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
 get_openhands_version

 echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
@@ -64,7 +67,7 @@ function run_eval() {
  export LANGUAGE=java
  echo "About to run command"
  COMMAND="EVAL_DOCKER_IMAGE_PREFIX=$EVAL_DOCKER_IMAGE_PREFIX; LANGUAGE=java;
-    poetry run python evaluation/benchmarks/multi_swe_bench/run_infer.py \
+    $PKG_RUN python evaluation/benchmarks/multi_swe_bench/run_infer.py \
    --agent-cls CodeActAgent \
    --llm-config $MODEL \
    --max-iterations $MAX_ITER \
@@ -90,7 +93,7 @@ function run_eval() {
 for run_idx in $(seq 1 $N_RUNS); do
    if [ -n "$SKIP_IDS_THRESHOLD" ]; then
        echo "Computing SKIP_IDS for run $run_idx..."
-        SKIP_CMD="poetry run python evaluation/benchmarks/multi_swe_bench/compute_skip_ids.py $SKIP_IDS_THRESHOLD"
+        SKIP_CMD="$PKG_RUN python evaluation/benchmarks/multi_swe_bench/compute_skip_ids.py $SKIP_IDS_THRESHOLD"
        if [ -n "$SKIP_IDS_PATTERN" ]; then
            SKIP_CMD="$SKIP_CMD --pattern \"$SKIP_IDS_PATTERN\""
        fi
@@ -150,8 +153,8 @@ for run_idx in $(seq 1 $N_RUNS); do
        echo "### Evaluating on $OUTPUT_FILE ... ###"
        OUTPUT_CONFIG_FILE="${OUTPUT_FILE%.jsonl}_config.json"
        export EVAL_SKIP_BUILD_ERRORS=true
-        COMMAND="poetry run python ./evaluation/benchmarks/multi_swe_bench/scripts/eval/update_multi_swe_bench_config.py --input $OUTPUT_FILE --output $OUTPUT_CONFIG_FILE --dataset $EVAL_DATASET;
-        poetry run python -m multi_swe_bench.harness.run_evaluation --config $OUTPUT_CONFIG_FILE
+        COMMAND="$PKG_RUN python ./evaluation/benchmarks/multi_swe_bench/scripts/eval/update_multi_swe_bench_config.py --input $OUTPUT_FILE --output $OUTPUT_CONFIG_FILE --dataset $EVAL_DATASET;
+        $PKG_RUN python -m multi_swe_bench.harness.run_evaluation --config $OUTPUT_CONFIG_FILE
        "

        echo "Running command: $COMMAND"
@@ -170,10 +173,10 @@ for run_idx in $(seq 1 $N_RUNS); do

    # update the output with evaluation results
    echo "### Updating the output with evaluation results... ###"
-    poetry run python evaluation/benchmarks/multi_swe_bench/scripts/eval/update_output_with_eval.py $OUTPUT_FILE
+    $PKG_RUN python evaluation/benchmarks/multi_swe_bench/scripts/eval/update_output_with_eval.py $OUTPUT_FILE

    echo "### Combining the final completions... ###"
-    poetry run python evaluation/benchmarks/multi_swe_bench/scripts/eval/combine_final_completions.py $OUTPUT_FILE
+    $PKG_RUN python evaluation/benchmarks/multi_swe_bench/scripts/eval/combine_final_completions.py $OUTPUT_FILE

    echo "### DONE for run $run_idx! ###"
    echo "You can find the final output at $(dirname $OUTPUT_FILE)/$FINAL_OUTPUT_FILE"
--- a/evaluation/benchmarks/multi_swe_bench/scripts/run_infer.sh
+++ b/evaluation/benchmarks/multi_swe_bench/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -115,7 +118,7 @@ fi

 function run_eval() {
  local eval_note=$1
-  COMMAND="poetry run python evaluation/benchmarks/multi_swe_bench/run_infer.py \
+  COMMAND="$PKG_RUN python evaluation/benchmarks/multi_swe_bench/run_infer.py \
    --agent-cls $AGENT \
    --llm-config $MODEL_CONFIG \
    --max-iterations $MAX_ITER \
--- a/evaluation/benchmarks/nocode_bench/scripts/run_infer_nc.sh
+++ b/evaluation/benchmarks/nocode_bench/scripts/run_infer_nc.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -103,7 +106,7 @@ fi

 function run_eval() {
  local eval_note="${1}"
-  COMMAND="poetry run python evaluation/benchmarks/nocode_bench/run_infer_nc.py \
+  COMMAND="$PKG_RUN python evaluation/benchmarks/nocode_bench/run_infer_nc.py \
    --agent-cls $AGENT \
    --llm-config $MODEL_CONFIG \
    --max-iterations $MAX_ITER \
--- a/evaluation/benchmarks/scienceagentbench/scripts/run_infer.sh
+++ b/evaluation/benchmarks/scienceagentbench/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 USE_KNOWLEDGE=$3
@@ -32,7 +35,7 @@ echo "AGENT: $AGENT"
 echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
 echo "MODEL_CONFIG: $MODEL_CONFIG"

-COMMAND="poetry run python evaluation/benchmarks/scienceagentbench/run_infer.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/scienceagentbench/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --use-knowledge $USE_KNOWLEDGE \
--- a/evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_folder_to_swebench_submission.sh
+++ b/evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_folder_to_swebench_submission.sh
@@ -1,11 +1,14 @@
 #!/usr/bin/env bash

+
+# Package manager runner - uses UV if USE_UV=1, otherwise Poetry
+PKG_RUN=${PKG_RUN:-poetry run}
 FOLDER_PATH=$1
 NEW_FOLDER_PATH=${FOLDER_PATH}.swebench_submission
 mkdir -p $NEW_FOLDER_PATH

 # Build all_preds.jsonl
-poetry run python evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $FOLDER_PATH/output.jsonl
+$PKG_RUN python evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $FOLDER_PATH/output.jsonl
 mv $FOLDER_PATH/output.swebench.jsonl $NEW_FOLDER_PATH/all_preds.jsonl

 # Build trajs/
--- a/evaluation/benchmarks/swe_bench/scripts/eval_infer.sh
+++ b/evaluation/benchmarks/swe_bench/scripts/eval_infer.sh
@@ -1,5 +1,8 @@
 #!/usr/bin/env bash

+
+# Package manager runner - uses UV if USE_UV=1, otherwise Poetry
+PKG_RUN=${PKG_RUN:-poetry run}
 PROCESS_FILEPATH=$1
 if [ -z "$PROCESS_FILEPATH" ]; then
    echo "Error: PROCESS_FILEPATH is empty. Usage: ./eval_infer.sh <output_file> [instance_id] [dataset_name] [split]"
@@ -66,7 +69,7 @@ else

    # ==== Convert OH format to SWE-bench format ====
    echo "Merged output file with fine-grained report will be saved to $FILE_DIR"
-    poetry run python3 evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH
+    $PKG_RUN python3 evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH
    # replace .jsonl with .swebench.jsonl in filename
    SWEBENCH_FORMAT_JSONL=${PROCESS_FILEPATH/.jsonl/.swebench.jsonl}
    echo "SWEBENCH_FORMAT_JSONL: $SWEBENCH_FORMAT_JSONL"
@@ -97,7 +100,7 @@ if [ -z "$INSTANCE_ID" ]; then
    # Default to SWE-Bench-lite
    # change `--dataset_name` and `--split` to alter dataset

-    poetry run python -m swebench.harness.run_evaluation \
+    $PKG_RUN python -m swebench.harness.run_evaluation \
        --dataset_name "$DATASET_NAME" \
        --split "$SPLIT" \
        --predictions_path $SWEBENCH_FORMAT_JSONL \
@@ -140,11 +143,11 @@ if [ -z "$INSTANCE_ID" ]; then
        mv $REPORT_PATH $RESULT_OUTPUT_DIR/report.json
    fi

-    poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH
+    $PKG_RUN python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH

 else
    echo "Running SWE-bench evaluation on the instance_id: $INSTANCE_ID"
-    poetry run python -m swebench.harness.run_evaluation \
+    $PKG_RUN python -m swebench.harness.run_evaluation \
        --dataset_name "$DATASET_NAME" \
        --split "$SPLIT" \
        --predictions_path $SWEBENCH_FORMAT_JSONL \
--- a/evaluation/benchmarks/swe_bench/scripts/rollout_swegym.sh
+++ b/evaluation/benchmarks/swe_bench/scripts/rollout_swegym.sh
@@ -35,6 +35,9 @@ MAX_ITER=100

 # ===== Run inference =====
 source "evaluation/utils/version_control.sh"
+
+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
 get_openhands_version

 echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
@@ -51,7 +54,7 @@ EVAL_NOTE="$OPENHANDS_VERSION-no-hint-$EXP_NAME"

 function run_eval() {
  local eval_note=$1
-  COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer.py \
+  COMMAND="$PKG_RUN python evaluation/benchmarks/swe_bench/run_infer.py \
    --agent-cls CodeActAgent \
    --llm-config $MODEL \
    --max-iterations $MAX_ITER \
@@ -97,7 +100,7 @@ for run_idx in $(seq 1 $N_RUNS); do

    while true; do
        echo "### Evaluating on $OUTPUT_FILE ... ###"
-        COMMAND="poetry run python evaluation/benchmarks/swe_bench/eval_infer.py \
+        COMMAND="$PKG_RUN python evaluation/benchmarks/swe_bench/eval_infer.py \
        --eval-num-workers $((N_WORKERS * 2)) \
        --input-file $OUTPUT_FILE \
        --dataset $DATASET \
@@ -123,10 +126,10 @@ for run_idx in $(seq 1 $N_RUNS); do

    # update the output with evaluation results
    echo "### Updating the output with evaluation results... ###"
-    poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $OUTPUT_FILE
+    $PKG_RUN python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $OUTPUT_FILE

    echo "### Combining the final completions... ###"
-    poetry run python evaluation/benchmarks/swe_bench/scripts/eval/combine_final_completions.py $OUTPUT_FILE
+    $PKG_RUN python evaluation/benchmarks/swe_bench/scripts/eval/combine_final_completions.py $OUTPUT_FILE

    echo "### DONE for run $run_idx! ###"
    echo "You can find the final output at $(dirname $OUTPUT_FILE)/$FINAL_OUTPUT_FILE"
--- a/evaluation/benchmarks/swe_bench/scripts/run_infer.sh
+++ b/evaluation/benchmarks/swe_bench/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -103,7 +106,7 @@ fi

 function run_eval() {
  local eval_note="${1}"
-  COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer.py \
+  COMMAND="$PKG_RUN python evaluation/benchmarks/swe_bench/run_infer.py \
    --agent-cls $AGENT \
    --llm-config $MODEL_CONFIG \
    --max-iterations $MAX_ITER \
--- a/evaluation/benchmarks/swe_bench/scripts/run_infer_interact.sh
+++ b/evaluation/benchmarks/swe_bench/scripts/run_infer_interact.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -91,7 +94,7 @@ fi

 function run_eval() {
  local eval_note="${1}"
-  COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer_interact.py \
+  COMMAND="$PKG_RUN python evaluation/benchmarks/swe_bench/run_infer_interact.py \
    --agent-cls $AGENT \
    --llm-config $MODEL_CONFIG \
    --max-iterations $MAX_ITER \
--- a/evaluation/benchmarks/swe_bench/scripts/run_localize.sh
+++ b/evaluation/benchmarks/swe_bench/scripts/run_localize.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -77,7 +80,7 @@ fi

 function run_eval() {
  local eval_note=$1
-  COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_localize.py \
+  COMMAND="$PKG_RUN python evaluation/benchmarks/swe_bench/run_localize.py \
    --agent-cls $AGENT \
    --llm-config $MODEL_CONFIG \
    --max-iterations $MAX_ITER \
--- a/evaluation/benchmarks/swe_perf/scripts/run_infer.sh
+++ b/evaluation/benchmarks/swe_perf/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -103,7 +106,7 @@ fi

 function run_eval() {
  local eval_note="${1}"
-  COMMAND="poetry run python evaluation/benchmarks/swe_perf/run_infer.py \
+  COMMAND="$PKG_RUN python evaluation/benchmarks/swe_perf/run_infer.py \
    --agent-cls $AGENT \
    --llm-config $MODEL_CONFIG \
    --max-iterations $MAX_ITER \
--- a/evaluation/benchmarks/swefficiency/scripts/run_infer.sh
+++ b/evaluation/benchmarks/swefficiency/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -107,7 +110,7 @@ export NO_CHANGE_TIMEOUT_SECONDS=900 # 15 minutes

 function run_eval() {
  local eval_note="${1}"
-  COMMAND="poetry run python evaluation/benchmarks/swefficiency/run_infer.py \
+  COMMAND="$PKG_RUN python evaluation/benchmarks/swefficiency/run_infer.py \
    --agent-cls $AGENT \
    --llm-config $MODEL_CONFIG \
    --max-iterations $MAX_ITER \
--- a/evaluation/benchmarks/testgeneval/scripts/eval/convert_oh_folder_to_swebench_submission.sh
+++ b/evaluation/benchmarks/testgeneval/scripts/eval/convert_oh_folder_to_swebench_submission.sh
@@ -1,11 +1,14 @@
 #!/bin/bash

+
+# Package manager runner - uses UV if USE_UV=1, otherwise Poetry
+PKG_RUN=${PKG_RUN:-poetry run}
 FOLDER_PATH=$1
 NEW_FOLDER_PATH=${FOLDER_PATH}.swebench_submission
 mkdir -p $NEW_FOLDER_PATH

 # Build all_preds.jsonl
-poetry run python evaluation/testgeneval/scripts/eval/convert_oh_output_to_swe_json.py $FOLDER_PATH/output.jsonl
+$PKG_RUN python evaluation/testgeneval/scripts/eval/convert_oh_output_to_swe_json.py $FOLDER_PATH/output.jsonl
 mv $FOLDER_PATH/output.swebench.jsonl $NEW_FOLDER_PATH/all_preds.jsonl

 # Build trajs/
--- a/evaluation/benchmarks/testgeneval/scripts/eval_infer.sh
+++ b/evaluation/benchmarks/testgeneval/scripts/eval_infer.sh
@@ -1,6 +1,9 @@
 #!/bin/bash
 set -eo pipefail

+# Package manager runner - uses UV if USE_UV=1, otherwise Poetry
+PKG_RUN=${PKG_RUN:-poetry run}
+
 INPUT_FILE=$1
 NUM_WORKERS=$2
 DATASET=$3
@@ -29,7 +32,7 @@ fi

 echo "... Evaluating on $INPUT_FILE ..."

-COMMAND="poetry run python evaluation/benchmarks/testgeneval/eval_infer.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/testgeneval/eval_infer.py \
  --eval-num-workers $NUM_WORKERS \
  --input-file $INPUT_FILE \
  --dataset $DATASET \
@@ -50,4 +53,4 @@ echo $COMMAND
 eval $COMMAND

 # update the output with evaluation results
-# poetry run python evaluation/benchmarks/testgeneval/scripts/eval/update_output_with_eval.py $INPUT_FILE
+# $PKG_RUN python evaluation/benchmarks/testgeneval/scripts/eval/update_output_with_eval.py $INPUT_FILE
--- a/evaluation/benchmarks/testgeneval/scripts/run_infer.sh
+++ b/evaluation/benchmarks/testgeneval/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -85,7 +88,7 @@ fi

 function run_eval() {
  local eval_note=$1
-  COMMAND="poetry run python evaluation/benchmarks/testgeneval/run_infer.py \
+  COMMAND="$PKG_RUN python evaluation/benchmarks/testgeneval/run_infer.py \
    --agent-cls $AGENT \
    --llm-config $MODEL_CONFIG \
    --max-iterations $MAX_ITER \
--- a/evaluation/benchmarks/the_agent_company/scripts/run_infer.sh
+++ b/evaluation/benchmarks/the_agent_company/scripts/run_infer.sh
@@ -1,5 +1,8 @@
 #!/usr/bin/env bash

+
+# Package manager runner - uses UV if USE_UV=1, otherwise Poetry
+PKG_RUN=${PKG_RUN:-poetry run}
 ##################################################################################################
 # Adapted from https://github.com/TheAgentCompany/TheAgentCompany/blob/main/evaluation/run_eval.sh
 ##################################################################################################
@@ -145,7 +148,7 @@ while IFS= read -r task_image; do
    docker pull $task_image

    # Build the Python command
-    COMMAND="poetry run python -m evaluation.benchmarks.the_agent_company.run_infer \
+    COMMAND="$PKG_RUN python -m evaluation.benchmarks.the_agent_company.run_infer \
            --agent-llm-config \"$AGENT_LLM_CONFIG\" \
            --env-llm-config \"$ENV_LLM_CONFIG\" \
            --outputs-path \"$OUTPUTS_PATH\" \
--- a/evaluation/benchmarks/toolqa/scripts/run_infer.sh
+++ b/evaluation/benchmarks/toolqa/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -47,7 +50,7 @@ echo "DATASET: $DATASET"
 echo "HARDNESS: $HARDNESS"
 echo "WOLFRAM_APPID: $WOLFRAM_APPID"

-COMMAND="poetry run python evaluation/benchmarks/toolqa/run_infer.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/toolqa/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --max-iterations 30 \
--- a/evaluation/benchmarks/visual_swe_bench/scripts/eval_infer.sh
+++ b/evaluation/benchmarks/visual_swe_bench/scripts/eval_infer.sh
@@ -1,5 +1,8 @@
 #!/bin/bash

+
+# Package manager runner - uses UV if USE_UV=1, otherwise Poetry
+PKG_RUN=${PKG_RUN:-poetry run}
 PROCESS_FILEPATH=$1
 if [ -z "$PROCESS_FILEPATH" ]; then
    echo "Error: PROCESS_FILEPATH is empty. Usage: ./eval_infer.sh <output_file> [instance_id] [dataset_name] [split]"
@@ -58,7 +61,7 @@ else

    # ==== Convert OH format to SWE-bench format ====
    echo "Merged output file with fine-grained report will be saved to $FILE_DIR"
-    poetry run python3 evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH
+    $PKG_RUN python3 evaluation/benchmarks/swe_bench/scripts/eval/convert_oh_output_to_swe_json.py $PROCESS_FILEPATH
    # replace .jsonl with .swebench.jsonl in filename
    SWEBENCH_FORMAT_JSONL=${PROCESS_FILEPATH/.jsonl/.swebench.jsonl}
    echo "SWEBENCH_FORMAT_JSONL: $SWEBENCH_FORMAT_JSONL"
@@ -83,7 +86,7 @@ if [ -z "$INSTANCE_ID" ]; then
    # Default to SWE-Bench-lite
    # change `--dataset_name` and `--split` to alter dataset

-    poetry run python -m visualswebench.harness.run_evaluation \
+    $PKG_RUN python -m visualswebench.harness.run_evaluation \
        --dataset_name "$DATASET_NAME" \
        --split "$SPLIT" \
        --predictions_path $SWEBENCH_FORMAT_JSONL \
@@ -125,11 +128,11 @@ if [ -z "$INSTANCE_ID" ]; then
        mv $REPORT_PATH $RESULT_OUTPUT_DIR/report.json
    fi

-    poetry run python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH
+    $PKG_RUN python evaluation/benchmarks/swe_bench/scripts/eval/update_output_with_eval.py $PROCESS_FILEPATH

 else
    echo "Running SWE-bench evaluation on the instance_id: $INSTANCE_ID"
-    poetry run python -m visualswebench.harness.run_evaluation \
+    $PKG_RUN python -m visualswebench.harness.run_evaluation \
        --dataset_name "$DATASET_NAME" \
        --split "$SPLIT" \
        --predictions_path $SWEBENCH_FORMAT_JSONL \
--- a/evaluation/benchmarks/visual_swe_bench/scripts/run_infer.sh
+++ b/evaluation/benchmarks/visual_swe_bench/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
@@ -84,7 +87,7 @@ fi

 function run_eval() {
  local eval_note=$1
-  COMMAND="poetry run python evaluation/benchmarks/visual_swe_bench/run_infer.py \
+  COMMAND="$PKG_RUN python evaluation/benchmarks/visual_swe_bench/run_infer.py \
    --agent-cls $AGENT \
    --llm-config $MODEL_CONFIG \
    --max-iterations $MAX_ITER \
--- a/evaluation/benchmarks/visualwebarena/scripts/run_infer.sh
+++ b/evaluation/benchmarks/visualwebarena/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 # configure browsing agent
 export USE_NAV="true"
 export USE_CONCISE_ANSWER="true"
@@ -32,7 +35,7 @@ echo "MODEL_CONFIG: $MODEL_CONFIG"

 EVAL_NOTE="${OPENHANDS_VERSION}"

-COMMAND="poetry run python evaluation/benchmarks/visualwebarena/run_infer.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/visualwebarena/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --max-iterations 15 \
--- a/evaluation/benchmarks/webarena/scripts/run_infer.sh
+++ b/evaluation/benchmarks/webarena/scripts/run_infer.sh
@@ -3,6 +3,9 @@ set -eo pipefail

 source "evaluation/utils/version_control.sh"

+# Get package runner (poetry run or uv run based on USE_UV env var)
+PKG_RUN=$(get_pkg_run)
+
 # configure webarena websites and environment
 source evaluation/benchmarks/webarena/scripts/webarena_env.sh

@@ -35,7 +38,7 @@ echo "MODEL_CONFIG: $MODEL_CONFIG"

 EVAL_NOTE="$OPENHANDS_VERSION"

-COMMAND="poetry run python evaluation/benchmarks/webarena/run_infer.py \
+COMMAND="$PKG_RUN python evaluation/benchmarks/webarena/run_infer.py \
  --agent-cls $AGENT \
  --llm-config $MODEL_CONFIG \
  --max-iterations 15 \
--- a/evaluation/utils/version_control.sh
+++ b/evaluation/utils/version_control.sh
@@ -1,3 +1,13 @@
+# Package manager runner - uses UV if USE_UV=1, otherwise Poetry
+# This allows gradual migration from Poetry to UV
+get_pkg_run() {
+    if [ "${USE_UV:-0}" = "1" ]; then
+        echo "uv run"
+    else
+        echo "poetry run"
+    fi
+}
+
 checkout_eval_branch() {
    if [ -z "$COMMIT_HASH" ]; then
        echo "Commit hash not specified, use current git commit"
@@ -42,5 +52,6 @@ checkout_original_branch() {
 get_openhands_version() {
    # IMPORTANT: Because Agent's prompt changes fairly often in the rapidly evolving codebase of OpenHands
    # We need to track the version of Agent in the evaluation to make sure results are comparable
-    OPENHANDS_VERSION=v$(poetry run python -c "from openhands import get_version; print(get_version())")
+    PKG_RUN=$(get_pkg_run)
+    OPENHANDS_VERSION=v$($PKG_RUN python -c "from openhands import get_version; print(get_version())")
 }
--- a/openhands/runtime/utils/runtime_build.py
+++ b/openhands/runtime/utils/runtime_build.py
@@ -289,12 +289,13 @@ def prep_build_folder(
    # Copy the 'skills' directory (Skills)
    shutil.copytree(Path(project_root, 'skills'), Path(build_folder, 'code', 'skills'))

-    # Copy pyproject.toml and poetry.lock files
-    for file in ['pyproject.toml', 'poetry.lock']:
+    # Copy pyproject.toml and lock files (poetry.lock and uv.lock if it exists)
+    for file in ['pyproject.toml', 'poetry.lock', 'uv.lock']:
        src = Path(openhands_source_dir, file)
        if not src.exists():
            src = Path(project_root, file)
-        shutil.copy2(src, Path(build_folder, 'code', file))
+        if src.exists():
+            shutil.copy2(src, Path(build_folder, 'code', file))

    # Create a Dockerfile and write it to build_folder
    dockerfile_content = _generate_dockerfile(
@@ -328,13 +329,15 @@ def get_hash_for_lock_files(base_image: str, enable_browser: bool = True) -> str
    # Only include enable_browser in hash when it's False for backward compatibility
    if not enable_browser:
        md5.update(str(enable_browser).encode())
-    for file in ['pyproject.toml', 'poetry.lock']:
+    # Include pyproject.toml and lock files (poetry.lock and uv.lock if it exists)
+    for file in ['pyproject.toml', 'poetry.lock', 'uv.lock']:
        src = Path(openhands_source_dir, file)
        if not src.exists():
            src = Path(openhands_source_dir.parent, file)
-        with open(src, 'rb') as f:
-            for chunk in iter(lambda: f.read(4096), b''):
-                md5.update(chunk)
+        if src.exists():
+            with open(src, 'rb') as f:
+                for chunk in iter(lambda: f.read(4096), b''):
+                    md5.update(chunk)
    # We get away with truncation because we want something that is unique
    # rather than something that is cryptographically secure
    result = truncate_hash(md5.hexdigest())
--- a/openhands/runtime/utils/runtime_templates/Dockerfile.j2
+++ b/openhands/runtime/utils/runtime_templates/Dockerfile.j2
@@ -296,7 +296,7 @@ RUN /openhands/micromamba/bin/micromamba create -n openhands -y && \
    /openhands/micromamba/bin/micromamba install -n openhands -c conda-forge poetry python=3.12 -y
 USER root

-# Create a clean openhands directory including only the pyproject.toml, poetry.lock and openhands/__init__.py
+# Create a clean openhands directory including only the pyproject.toml, poetry.lock, uv.lock and openhands/__init__.py
 RUN \
    if [ -d /openhands/code ]; then rm -rf /openhands/code; fi && \
    mkdir -p /openhands/code/openhands && \
@@ -307,6 +307,8 @@ RUN \
    git config --global user.email "openhands@all-hands.dev"

 COPY --chown=openhands:openhands ./code/pyproject.toml ./code/poetry.lock /openhands/code/
+# Copy uv.lock if it exists (for future UV support)
+COPY --chown=openhands:openhands ./code/uv.lock* /openhands/code/

 {{ install_dependencies_user() }}
 {{ install_dependencies_root() }}
@@ -342,6 +344,8 @@ RUN \
 # ================================================================
 RUN if [ -d /openhands/code/openhands ]; then rm -rf /openhands/code/openhands; fi
 COPY --chown=openhands:openhands ./code/pyproject.toml ./code/poetry.lock /openhands/code/
+# Copy uv.lock if it exists (for future UV support)
+COPY --chown=openhands:openhands ./code/uv.lock* /openhands/code/
 RUN if [ -d /openhands/code/skills ]; then rm -rf /openhands/code/skills; fi
 COPY --chown=openhands:openhands ./code/skills /openhands/code/skills
 COPY --chown=openhands:openhands ./code/openhands /openhands/code/openhands
--- a/poetry.lock
+++ b/poetry.lock
@@ -1816,7 +1816,7 @@ files = [
    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
-markers = {main = "platform_system == \"Windows\" or os_name == \"nt\" or sys_platform == \"win32\"", dev = "os_name == \"nt\" or sys_platform == \"win32\"", runtime = "sys_platform == \"win32\"", test = "sys_platform == \"win32\""}
+markers = {main = "platform_system == \"Windows\" or sys_platform == \"win32\" or os_name == \"nt\"", dev = "os_name == \"nt\" or sys_platform == \"win32\"", runtime = "sys_platform == \"win32\"", test = "sys_platform == \"win32\""}

 [[package]]
 name = "comm"
@@ -16846,4 +16846,4 @@ third-party-runtimes = ["daytona", "e2b-code-interpreter", "modal", "runloop-api
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12,<3.14"
-content-hash = "78c01d3e121d5f27a4120d043408ac0aa93fa95fe0c2c7d678161b08ccb582c1"
+content-hash = "ef3a6a2526eec15650284a245d0bd0dbf764514401799fc15677d72a7a09b2de"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,8 +1,174 @@
 [build-system]
 build-backend = "poetry.core.masonry.api"
-requires = [
-  "poetry-core",
+requires = [ "poetry-core" ]
+
+[project]
+name = "openhands-ai"
+version = "1.1.0"
+description = "OpenHands: Code Less, Make More"
+readme = "README.md"
+license = "MIT"
+authors = [ { name = "OpenHands", email = "contact@all-hands.dev" } ]
+requires-python = ">=3.12,<3.14"
+classifiers = [
+  "Programming Language :: Python :: 3 :: Only",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
 ]
+# Main dependencies (mirrors [tool.poetry.dependencies] for UV compatibility)
+dependencies = [
+  "aiohttp>=3.9,!=3.11.13",
+  "anthropic[vertex]",
+  "anyio==4.9",
+  "asyncpg>=0.30",
+  "bashlex>=0.18",
+  "boto3",
+  "browsergym-core==0.13.3",
+  "deprecated",
+  "deprecation>=2.1",
+  "dirhash",
+  "docker",
+  "fastapi",
+  "fastmcp>=2.12.4",
+  "google-api-python-client>=2.164",
+  "google-auth-httplib2",
+  "google-auth-oauthlib",
+  "google-cloud-aiplatform",
+  "google-genai",
+  "html2text",
+  "httpx-aiohttp>=0.1.8",
+  "ipywidgets>=8.1.5",
+  "jinja2>=3.1.6",
+  "joblib",
+  "json-repair",
+  "jupyter-kernel-gateway",
+  "kubernetes>=33.1",
+  "libtmux>=0.46.2",
+  "litellm!=1.64.4,!=1.67.*,>=1.74.3",
+  "lmnr>=0.7.20",
+  "memory-profiler>=0.61",
+  "numpy",
+  "openai==2.8",
+  "openhands-aci==0.3.2",
+  "openhands-agent-server==1.8.1",
+  "openhands-sdk==1.8.1",
+  "openhands-tools==1.8.1",
+  "opentelemetry-api>=1.33.1",
+  "opentelemetry-exporter-otlp-proto-grpc>=1.33.1",
+  "pathspec>=0.12.1",
+  "pexpect",
+  "pg8000>=1.31.5",
+  "pillow>=11.3",
+  "playwright>=1.55",
+  "poetry>=2.1.2",
+  "prompt-toolkit>=3.0.50",
+  "protobuf>=5,<6",
+  "psutil",
+  "pybase62>=1",
+  "pygithub>=2.5",
+  "pyjwt>=2.9",
+  "pylatexenc",
+  "pypdf>=6",
+  "python-docx",
+  "python-dotenv",
+  "python-frontmatter>=1.1",
+  "python-jose[cryptography]>=3.3",
+  "python-json-logger>=3.2.1",
+  "python-multipart",
+  "python-pptx",
+  "python-socketio>=5.11.4",
+  "pythonnet",
+  "pyyaml>=6.0.2",
+  "qtconsole>=5.6.1",
+  "rapidfuzz>=3.9",
+  "redis>=5.2,<7",
+  "requests>=2.32.5",
+  "setuptools>=78.1.1",
+  "shellingham>=1.5.4",
+  "sqlalchemy[asyncio]>=2.0.40",
+  "sse-starlette>=3.0.2",
+  "starlette>=0.48",
+  "tenacity>=8.5,<10",
+  "termcolor",
+  "toml",
+  "tornado>=6.5",
+  "types-toml",
+  "urllib3>=2.6.3",
+  "uvicorn",
+  "whatthepatch>=1.0.6",
+  "zope-interface==7.2",
+]
+
+optional-dependencies.third_party_runtimes = [
+  "daytona==0.24.2",
+  "e2b-code-interpreter>=2",
+  "modal>=0.66.26,<1.2",
+  "runloop-api-client==0.50",
+]
+urls.Homepage = "https://github.com/OpenHands/OpenHands"
+urls.Repository = "https://github.com/OpenHands/OpenHands"
+
+[dependency-groups]
+dev = [
+  "build",
+  "mypy==1.17",
+  "pre-commit==4.2",
+  "pytest>=8.4",
+  "pytest-asyncio>=1.3",
+  "ruff==0.12.5",
+  "types-setuptools",
+]
+test = [
+  "gevent>=24.2.1,<26",
+  "pandas",
+  "pytest",
+  "pytest-asyncio",
+  "pytest-cov",
+  "pytest-forked",
+  "pytest-playwright>=0.7",
+  "pytest-timeout>=2.4",
+  "pytest-xdist",
+  "reportlab",
+]
+runtime = [
+  "flake8",
+  "jupyterlab",
+  "notebook",
+]
+evaluation = [
+  "boto3-stubs[s3]>=1.37.19",
+  "browsergym==0.13.3",
+  "browsergym-miniwob==0.13.3",
+  "browsergym-visualwebarena==0.13.3",
+  "browsergym-webarena==0.13.3",
+  "commit0",
+  "datasets",
+  "evaluate",
+  "func-timeout",
+  "gdown",
+  "joblib",
+  "matplotlib",
+  "multi-swe-bench==0.1.2",
+  "pandas",
+  "pyarrow==21",
+  "retry",
+  "seaborn",
+  "streamlit",
+  "swebench",
+  "swegym",
+  "sympy",
+  "tabulate",
+  "visualswebench",
+  "whatthepatch",
+]
+testgeneval = [
+  "fuzzywuzzy>=0.18",
+  "python-levenshtein>=0.26.1,<0.28",
+  "rouge>=1.0.1",
+  "tree-sitter-python>=0.23.6",
+]
+
+# UV source configuration for git dependencies in evaluation group

 [tool.poetry]
 name = "openhands-ai"
@@ -221,3 +387,8 @@ lint.pydocstyle.convention = "google"
 concurrency = [ "gevent" ]
 relative_files = true
 omit = [ "enterprise/tests/*", "**/test_*" ]
+
+[tool.uv.sources]
+visualswebench = { git = "https://github.com/luolin101/Visual-SWE-bench.git" }
+swegym = { git = "https://github.com/SWE-Gym/SWE-Bench-Package.git" }
+swebench = { git = "https://github.com/ryanhoangt/SWE-bench.git", rev = "fix-modal-patch-eval" }
--- a/tests/unit/runtime/builder/test_runtime_build.py
+++ b/tests/unit/runtime/builder/test_runtime_build.py
@@ -62,12 +62,15 @@ def _check_source_code_in_dir(temp_dir):
    assert os.path.exists(os.path.join(code_dir, 'pyproject.toml'))

    # The source code should only include the `openhands` folder,
-    # and pyproject.toml & poetry.lock that are needed to build the runtime image
-    assert set(os.listdir(code_dir)) == {
-        'openhands',
-        'pyproject.toml',
-        'poetry.lock',
-    }
+    # and pyproject.toml & lock files that are needed to build the runtime image
+    expected_files = {'openhands', 'pyproject.toml', 'poetry.lock'}
+    # uv.lock is optional - include it if it exists
+    if os.path.exists(os.path.join(code_dir, 'uv.lock')):
+        expected_files.add('uv.lock')
+    # skills directory may also be present
+    if os.path.exists(os.path.join(code_dir, 'skills')):
+        expected_files.add('skills')
+    assert set(os.listdir(code_dir)) == expected_files
    assert os.path.exists(os.path.join(code_dir, 'openhands'))
    assert os.path.isdir(os.path.join(code_dir, 'openhands'))

@@ -89,9 +92,11 @@ def test_prep_build_folder(temp_dir):
            extra_deps=None,
        )

-    # make sure that the code (openhands/) and microagents folder were copied
+    # make sure that the code (openhands/) and skills folder were copied
    assert shutil_mock.copytree.call_count == 2
-    assert shutil_mock.copy2.call_count == 2
+    # copy2 is called for pyproject.toml, poetry.lock, and optionally uv.lock
+    # The exact count depends on whether uv.lock exists
+    assert shutil_mock.copy2.call_count >= 2

    # Now check dockerfile is in the folder
    dockerfile_path = os.path.join(temp_dir, 'Dockerfile')
@@ -100,26 +105,35 @@ def test_prep_build_folder(temp_dir):


 def test_get_hash_for_lock_files():
-    with patch('builtins.open', mock_open(read_data='mock-data'.encode())):
+    # Mock Path.exists to return True for all files including uv.lock
+    with (
+        patch('builtins.open', mock_open(read_data='mock-data'.encode())),
+        patch.object(Path, 'exists', return_value=True),
+    ):
        hash = get_hash_for_lock_files('some_base_image', enable_browser=True)
        # Since we mocked open to always return "mock_data", the hash is the result
-        # of hashing the name of the base image followed by "mock-data" twice
+        # of hashing the name of the base image followed by "mock-data" three times
+        # (pyproject.toml, poetry.lock, uv.lock)
        md5 = hashlib.md5()
        md5.update('some_base_image'.encode())
-        for _ in range(2):
+        for _ in range(3):  # pyproject.toml, poetry.lock, uv.lock
            md5.update('mock-data'.encode())
        assert hash == truncate_hash(md5.hexdigest())


 def test_get_hash_for_lock_files_different_enable_browser():
-    with patch('builtins.open', mock_open(read_data='mock-data'.encode())):
+    # Mock Path.exists to return True for all files including uv.lock
+    with (
+        patch('builtins.open', mock_open(read_data='mock-data'.encode())),
+        patch.object(Path, 'exists', return_value=True),
+    ):
        hash_true = get_hash_for_lock_files('some_base_image', enable_browser=True)
        hash_false = get_hash_for_lock_files('some_base_image', enable_browser=False)

        # Hash with enable_browser=True should not include the enable_browser value
        md5_true = hashlib.md5()
        md5_true.update('some_base_image'.encode())
-        for _ in range(2):
+        for _ in range(3):  # pyproject.toml, poetry.lock, uv.lock
            md5_true.update('mock-data'.encode())
        expected_hash_true = truncate_hash(md5_true.hexdigest())

@@ -127,7 +141,7 @@ def test_get_hash_for_lock_files_different_enable_browser():
        md5_false = hashlib.md5()
        md5_false.update('some_base_image'.encode())
        md5_false.update('False'.encode())  # enable_browser=False is included
-        for _ in range(2):
+        for _ in range(3):  # pyproject.toml, poetry.lock, uv.lock
            md5_false.update('mock-data'.encode())
        expected_hash_false = truncate_hash(md5_false.hexdigest())

--- a/uv.lock
+++ b/uv.lock
Author	SHA1	Message	Date
openhands	8a5d8f7006	Update evaluation scripts to support both Poetry and UV This PR updates all evaluation shell scripts to use a configurable package runner that supports both Poetry and UV. Changes: - evaluation/utils/version_control.sh: Add get_pkg_run() helper function that returns 'uv run' if USE_UV=1, otherwise 'poetry run' - All evaluation scripts: Replace hardcoded 'poetry run' with $PKG_RUN variable that uses the helper function Usage: - Default (Poetry): ./run_infer.sh ... - With UV: USE_UV=1 ./run_infer.sh ... This is a non-breaking change - all scripts default to Poetry behavior. Closes #12421 (partial) Co-authored-by: openhands <openhands@all-hands.dev>	2026-01-14 23:50:52 +00:00
openhands	64b1e68d2a	Update runtime build code to support both lock files This PR updates the runtime build code to copy and hash both poetry.lock and uv.lock files (when uv.lock exists). Changes: - openhands/runtime/utils/runtime_build.py: - prep_build_folder: Copy uv.lock alongside poetry.lock (if it exists) - get_hash_for_lock_files: Include uv.lock in hash calculation (if it exists) - tests/unit/runtime/builder/test_runtime_build.py: - Update _check_source_code_in_dir to handle optional uv.lock and skills - Update test_prep_build_folder to allow variable copy2 count - Update test_get_hash_for_lock_files to mock Path.exists for uv.lock - Update test_get_hash_for_lock_files_different_enable_browser similarly This is a non-breaking change - the code gracefully handles the case where uv.lock doesn't exist yet. Closes #12421 (partial) Co-authored-by: openhands <openhands@all-hands.dev>	2026-01-14 23:48:03 +00:00
openhands	f2a3a0da56	Add UV installation to GitHub Actions workflows This PR adds UV installation to GitHub Actions workflows while keeping Poetry as the default package manager for backward compatibility. Changes: - py-tests.yml: Add UV installation step for both test-on-linux and test-enterprise jobs - e2e-tests.yml: Add UV installation step - ghcr-build.yml: Add UV installation step for runtime build job - pypi-release.yml: Add UV installation step All workflows continue to use Poetry for dependency management. UV is installed alongside Poetry to enable future migration and to support any UV-specific features that may be needed. This is a non-breaking change - existing workflows continue to work unchanged with Poetry. Closes #12421 (partial) Co-authored-by: openhands <openhands@all-hands.dev>	2026-01-14 23:45:36 +00:00
openhands	50487f2a9c	Add UV support to Docker files alongside Poetry This PR adds UV installation to Docker files while keeping Poetry as the default for backward compatibility. Changes: - containers/app/Dockerfile: - Add USE_UV build arg (default: 0) for package manager selection - Install UV alongside Poetry - Copy both poetry.lock and uv.lock - Add conditional logic to use UV or Poetry based on USE_UV arg - containers/dev/Dockerfile: - Install UV alongside Poetry - Add UV to version.sh output - openhands/runtime/utils/runtime_templates/Dockerfile.j2: - Copy uv.lock alongside poetry.lock (using wildcard for optional file) - Update comments to mention uv.lock Usage: # Build with Poetry (default, unchanged behavior) docker build -t openhands . # Build with UV docker build --build-arg USE_UV=1 -t openhands . This is a non-breaking change - existing builds continue to work unchanged. UV can be opted into by setting USE_UV=1 build arg. Closes #12421 (partial) Co-authored-by: openhands <openhands@all-hands.dev>	2026-01-14 21:55:07 +00:00
openhands	3e6c1f0d27	Add UV support to Makefile alongside Poetry This PR adds UV as an alternative package manager in the Makefile while keeping Poetry as the default for backward compatibility. Changes: - Add USE_UV variable (default: 0) to select package manager - Add PKG_MANAGER, PKG_RUN, PKG_INSTALL variables for abstraction - Add check-pkg-manager target that checks for UV or Poetry based on USE_UV - Keep check-poetry as alias for backward compatibility - Update install-python-dependencies to support both UV and Poetry - Update install-pre-commit-hooks to use PKG_RUN - Update lint-backend to use PKG_RUN - Update start-backend to use PKG_RUN - Update _run_setup to use PKG_RUN Usage: # Use Poetry (default, unchanged behavior) make build make start-backend # Use UV USE_UV=1 make build USE_UV=1 make start-backend This is a non-breaking change - existing workflows continue to work unchanged. UV can be opted into by setting USE_UV=1. Closes #12421 (partial) Co-authored-by: openhands <openhands@all-hands.dev>	2026-01-14 21:53:03 +00:00
openhands	72b200d5a5	Add [project.dependencies], [project.optional-dependencies], and generate uv.lock This PR combines Steps 1.3-1.6 of the Poetry to UV migration: Step 1.3: Add [project.dependencies] - Added all main dependencies from [tool.poetry.dependencies] to [project.dependencies] - Dependencies are now readable by UV for dependency resolution Step 1.4: Add [project.optional-dependencies] - Added third_party_runtimes extras (modal, e2b, runloop, daytona) - Mirrors [tool.poetry.extras] for UV compatibility Step 1.5: Keep poetry.core build backend (intentionally not changed) - Keeping poetry.core.masonry.api to maintain Poetry compatibility - This allows both Poetry and UV to coexist without breaking changes Step 1.6: Generate uv.lock - Generated uv.lock file with 657 resolved packages - UV can now sync dependencies using: uv sync --group dev --group test UV is now testable! You can run: uv sync --group dev # Install main + dev dependencies uv run pytest tests/unit -x # Run tests with UV Poetry continues to work unchanged: poetry install --with dev,test # Still works poetry run pytest # Still works Co-authored-by: openhands <openhands@all-hands.dev>	2026-01-14 18:25:37 +00:00
openhands	8968e1f691	Add PEP 735 [dependency-groups] section for UV compatibility This is Step 1.2 in migrating from Poetry to UV for dependency management. Changes: - Add [dependency-groups] section with dev, test, runtime, evaluation, and testgeneval groups - Add [tool.uv.sources] section for git dependencies (visualswebench, swegym, swebench) - Keep all existing [tool.poetry.group.*] sections intact for backward compatibility The dependency groups mirror the existing Poetry groups: - dev: Development tools (ruff, mypy, pre-commit, pytest, etc.) - test: Testing dependencies (pytest plugins, pandas, gevent, etc.) - runtime: Runtime dependencies (jupyterlab, notebook, flake8) - evaluation: Evaluation benchmark dependencies - testgeneval: Test generation evaluation dependencies Git dependencies in the evaluation group are configured via [tool.uv.sources] to maintain compatibility with UV's dependency resolution. Co-authored-by: openhands <openhands@all-hands.dev>	2026-01-14 18:04:27 +00:00
openhands	86374d139d	Add PEP 621 [project] section for UV compatibility This is the first step in migrating from Poetry to UV for dependency management. Changes: - Add [project] section with standard PEP 621 metadata (name, description, readme, license, authors, requires-python, classifiers) - Add project.urls with Homepage and Repository - Mark version as dynamic to work with poetry-dynamic-versioning - Keep all existing [tool.poetry] sections intact for backward compatibility The [project] section coexists with [tool.poetry] during the migration period. Poetry will use the [project] values when both are defined, which is the expected behavior. Co-authored-by: openhands <openhands@all-hands.dev>	2026-01-14 17:40:30 +00:00